Simple code to split a big text file into multiple text files based on a delimiter or a string.
package com.fiam.gcr.batch.bean;
import java.util.List;
public class SplitFile {
private List<String> fileLines;
private String fileName;
private Integer fileCount;
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public List<String> getFileLines() {
return fileLines;
}
public void setFileLines(List<String> fileLines) {
this.fileLines = fileLines;
}
public Integer getFileCount() {
return fileCount;
}
public void setFileCount(Integer fileCount) {
this.fileCount = fileCount;
}
}
GcrTextFileProcessor.java
package com.fiam.gcr.batch.processor;
import java.util.List;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
import com.fiam.gcr.batch.bean.SplitFile;
public class GcrTextFileProcessor implements ItemProcessor<SplitFile, SplitFile> {
@Override
public SplitFile process(SplitFile inputSplitFile) throws Exception {
SplitFile outputSplitFile = new SplitFile();
outputSplitFile = inputSplitFile;
List<String> lines = inputSplitFile.getFileLines();
if(lines.size()>0){
System.out.println("Processing- "+inputSplitFile.getFileLines().get(0).substring(5, 9));
outputSplitFile.setFileName(inputSplitFile.getFileLines().get(0).substring(5, 9));
}
return outputSplitFile;
}
}
GcrTextFileReader.java
package com.fiam.gcr.batch.reader;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeJob;
import org.springframework.batch.core.annotation.BeforeRead;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import com.fiam.gcr.batch.bean.SplitFile;
import com.fiam.gcr.batch.util.Constants;
public class GcrTextFileReader implements ItemReader<SplitFile>{
private BufferedReader bin;
private Integer count;
private List<String> allLines = new ArrayList<String>();
public GcrTextFileReader(){
try {
FileReader fr = new FileReader(Constants.inputFolder+Constants.inputFile);
this.bin = new BufferedReader(fr);
this.count = 0;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
@Override
public SplitFile read() throws Exception, UnexpectedInputException,
ParseException {
System.out.println("Reading");
String text = bin.readLine();
SplitFile splitFile = new SplitFile();
String line ="";
while(text!=null){
line=text;
if(line.startsWith("FDXXX") && count==0){
allLines = new ArrayList<String>();
allLines.add(line);
text = bin.readLine();
count++;
}else if(line.startsWith("FDXXX") && count>0){
splitFile.setFileLines(allLines);
allLines = new ArrayList<String>();
allLines.add(line);
count++;
return splitFile;
}else{
allLines.add(line);
text = bin.readLine();
}
/*
if(line.startsWith("FDXXX")){
if (count>0) {
splitFile.setFileLines(allLines);
count++;
allLines = new ArrayList<String>();
allLines.add(line);
return splitFile;
}
count++;
text = bin.readLine();
}
else{
allLines.add(line);
text = bin.readLine();
}
*/
}
if(allLines.size()>0){
splitFile.setFileLines(allLines);
allLines = new ArrayList<String>();
count++;
return splitFile;
}
count = 0;
return null;
}
}
App.java
package com.fiam.gcr.batch.runnable;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameter;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import com.fiam.gcr.batch.util.Constants;
public class App {
public static void main(String[] args) {
String[] springConfig =
{
"spring/batch/config/applicationContext.xml",
"spring/batch/config/jobConfig.xml"
};
ApplicationContext context =
new ClassPathXmlApplicationContext(springConfig);
JobLauncher jobLauncher = (JobLauncher) context.getBean("jobLauncher");
Job job = (Job) context.getBean("clean_split");
try {
JobExecution execution = jobLauncher.run(job, new JobParameters());
System.out.println("Delete Status : " + execution.getExecutionContext().getString("deleteStatus"));
System.out.println("Total Files created : " + execution.getExecutionContext().getInt("totalFilesCreated"));
System.out.println("Validation : " + execution.getExecutionContext().getString("validationStatus"));
System.out.println("Exit Status : " + execution.getStatus());
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Done");
}
}
CleanDirectory.java
package com.fiam.gcr.batch.tasklet;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.core.io.Resource;
import org.springframework.util.Assert;
import com.fiam.gcr.batch.util.Constants;
public class CleanDirectory implements Tasklet, InitializingBean{
private Resource directory;
@Override
public void afterPropertiesSet() throws Exception {
Assert.notNull(directory, "directory must be set");
}
@Override
public RepeatStatus execute(StepContribution contribution,
ChunkContext chunkContext) throws Exception {
File directory = new File(Constants.outputFolder);
FileUtils.cleanDirectory(directory);
chunkContext
.getStepContext()
.getStepExecution()
.getJobExecution()
.getExecutionContext()
.put("deleteStatus", "SUCCESS");
System.out.println("Deleted all existing files.");
return RepeatStatus.FINISHED;
}
public Resource getDirectory() {
return directory;
}
public void setDirectory(Resource directory) {
this.directory = directory;
}
}
SplitValidator.java
package com.fiam.gcr.batch.tasklet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.util.Assert;
import com.fiam.gcr.batch.util.Constants;
public class SplitValidator implements Tasklet, InitializingBean{
private String outputFolder;
private String inputFolder;
private String inputFileName;
@Override
public void afterPropertiesSet() throws Exception {
Assert.notNull(outputFolder, "outputFolder value must be set");
}
@Override
public RepeatStatus execute(StepContribution contribution,
ChunkContext chunkContext) throws Exception {
System.out.println("Validating...");
FileReader fr = new FileReader(inputFolder+inputFileName);
BufferedReader bin = new BufferedReader(fr);
String line="";
List<String> accountList = new ArrayList<String>();
Set<String> allAccounts = new HashSet<String>();
while ((line = bin.readLine()) != null) {
if(line.startsWith("FDXXX")){
if (line.length()>8) {
accountList.add(line.substring(5, 9));
allAccounts.add(line.substring(5, 9));
}
}
}
System.out.println("Accounts to be processed : "+allAccounts);
List<String> filesCreated = new ArrayList<String>();
File[] files = new File(outputFolder).listFiles();
//If this pathname does not denote a directory, then listFiles() returns null.
for (File file : files) {
if (file.isFile()) {
filesCreated.add(FilenameUtils.removeExtension(file.getName()));
}
}
System.out.println("Files created : "+filesCreated);
if(allAccounts.size()==filesCreated.size()){
chunkContext
.getStepContext()
.getStepExecution()
.getJobExecution()
.getExecutionContext()
.put("validationStatus", "SUCCESS");
}else{
chunkContext
.getStepContext()
.getStepExecution()
.getJobExecution()
.getExecutionContext()
.put("validationStatus", "FAILED");
}
return RepeatStatus.FINISHED;
}
public void setOutputFolder(String outputFolder) {
this.outputFolder = outputFolder;
}
public void setInputFolder(String inputFolder) {
this.inputFolder = inputFolder;
}
public void setInputFileName(String inputFileName) {
this.inputFileName = inputFileName;
}
}
Constants.java
package com.fiam.gcr.batch.util;
public class Constants {
public static final String inputFolder ="F:\\files\\GCR\\input\\";
public static final String inputFile ="simplet.txt";
public static final String outputFolder ="F:\\files\\GCR\\output\\";
//public static final String inputFolder ="";
}
GcrTextFileWriter.java
package com.fiam.gcr.batch.writer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.List;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.item.ItemWriter;
import com.fiam.gcr.batch.bean.SplitFile;
import com.fiam.gcr.batch.util.Constants;
public class GcrTextFileWriter implements ItemWriter<SplitFile> {
private int totalFilesCreated=0;
@BeforeStep
public void BeforeStep(StepExecution stepExecution){
stepExecution
.getJobExecution()
.getExecutionContext()
.putInt("totalFilesCreated", 0);
System.out.println("Splitting process started");
}
@AfterStep
public void AfterStep(StepExecution stepExecution) {
//update
stepExecution
.getJobExecution()
.getExecutionContext()
.put("totalFilesCreated", totalFilesCreated);
System.out.println("Splitting process ended.");
}
@Override
public void write(List<? extends SplitFile> splitFiles) throws Exception {
if (splitFiles.size()>0) {
for (SplitFile splitFile : splitFiles) {
File file = new File(Constants.outputFolder
+ splitFile.getFileName() + ".txt");
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
FileWriter fw = new FileWriter(file.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
for (String line : splitFile.getFileLines()) {
bw.write(line);
bw.newLine();
}
bw.close();
totalFilesCreated++;
}
}
System.out.println("__________"+splitFiles.size()+"__________");
}
}
}
applicationContext.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
<bean id="transactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
<bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository"/>
</bean>
<bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">
<property name="transactionManager" ref="transactionManager"/>
</bean>
<bean id="simpleJob" class="org.springframework.batch.core.job.SimpleJob" abstract="true">
<property name="jobRepository" ref="jobRepository" />
</bean>
</beans>
jobConfig.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:task="http://www.springframework.org/schema/task"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/batch
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
<import resource="applicationContext.xml"/>
<bean id="gcrTextFileReader" class="com.fiam.gcr.batch.reader.GcrTextFileReader" />
<bean id="gcrTextFileProcessor" class="com.fiam.gcr.batch.processor.GcrTextFileProcessor" scope="step"/>
<bean id="gcrTextFileWriter" class="com.fiam.gcr.batch.writer.GcrTextFileWriter" />
<!--
<batch:job id="testJob" job-repository="jobRepository" parent="simpleJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="gcrTextFileReader" processor="gcrTextFileProcessor" writer="gcrTextFileWriter" commit-interval="2"/>
</batch:tasklet>
</batch:step>
</batch:job>
-->
<!-- <batch:job id="testJob" job-repository="jobRepository" parent="simpleJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="gcrTextFileReader" processor="gcrTextFileProcessor" writer="gcrTextFileWriter" commit-interval="2"/>
</batch:tasklet>
</batch:step>
</batch:job>
-->
<job id="clean_split" job-repository="jobRepository" parent="simpleJob" xmlns="http://www.springframework.org/schema/batch">
<step id="cleanDir" next="splitFiles">
<tasklet ref="cleanDirTasklet" />
</step>
<step id="splitFiles" next="validateSplit">
<tasklet transaction-manager="transactionManager">
<chunk reader="gcrTextFileReader" processor="gcrTextFileProcessor" writer="gcrTextFileWriter" commit-interval="3" />
</tasklet>
</step>
<step id="validateSplit">
<tasklet ref="validateSplitTasklet" />
</step>
</job>
<bean id="cleanDirTasklet" class="com.fiam.gcr.batch.tasklet.CleanDirectory" >
<property name="directory" value="F:\\files\\GCR\\output\\" />
</bean>
<bean id="validateSplitTasklet" class="com.fiam.gcr.batch.tasklet.SplitValidator" >
<property name="outputFolder" value="F:\\files\\GCR\\output\\" />
<property name="inputFolder" value="F:\\files\\GCR\\input\\" />
<property name="inputFileName" value="simplet.txt" />
</bean>
</beans>
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.mkyong</groupId>
<artifactId>SpringBatchExample</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>SpringBatchExample</name>
<url>http://maven.apache.org</url>
<properties>
<jdk.version>1.6</jdk.version>
<spring.version>3.2.2.RELEASE</spring.version>
<spring.batch.version>3.0.6.RELEASE</spring.batch.version>
<mysql.driver.version>5.1.25</mysql.driver.version>
</properties>
<dependencies>
<!-- Spring Core -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>${spring.version}</version>
</dependency>
<!-- Spring Core -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>${spring.version}</version>
</dependency>
<!-- Spring Batch dependencies -->
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-core</artifactId>
<version>${spring.batch.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-infrastructure</artifactId>
<version>${spring.batch.version}</version>
</dependency>
<!-- MySQL database driver -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.driver.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
</dependencies>
<build>
<finalName>spring-batch</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.9</version>
<configuration>
<downloadSources>true</downloadSources>
<downloadJavadocs>false</downloadJavadocs>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>${jdk.version}</source>
<target>${jdk.version}</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
Source file can be downloaded from here
Hi Kishore, can't download can u share purushothcs@gmail.com , plz help
ReplyDeleteHi Kishore, I am also not able to download the entire project, Coul d you please share the code to sumadhuradapa@gmail.com Please help.
ReplyDeleteThanks for sharing Kishore!
ReplyDeleteGambling in the U.S. - MapYRO
ReplyDeleteWelcome to our page 경산 출장마사지 of gambling 공주 출장마사지 in the U.S. We'll update this page when it becomes available. 보령 출장마사지 Gambling is legal in several states in 김제 출장샵 the 도레미시디 출장샵 U.S. in