Simple code to split a big text file into multiple text files based on a delimiter or a string.
package com.fiam.gcr.batch.bean;
import java.util.List;
public class SplitFile {
private List<String> fileLines;
private String fileName;
private Integer fileCount;
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public List<String> getFileLines() {
return fileLines;
}
public void setFileLines(List<String> fileLines) {
this.fileLines = fileLines;
}
public Integer getFileCount() {
return fileCount;
}
public void setFileCount(Integer fileCount) {
this.fileCount = fileCount;
}
}
GcrTextFileProcessor.java
package com.fiam.gcr.batch.processor;
import java.util.List;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
import com.fiam.gcr.batch.bean.SplitFile;
public class GcrTextFileProcessor implements ItemProcessor<SplitFile, SplitFile> {
@Override
public SplitFile process(SplitFile inputSplitFile) throws Exception {
SplitFile outputSplitFile = new SplitFile();
outputSplitFile = inputSplitFile;
List<String> lines = inputSplitFile.getFileLines();
if(lines.size()>0){
System.out.println("Processing- "+inputSplitFile.getFileLines().get(0).substring(5, 9));
outputSplitFile.setFileName(inputSplitFile.getFileLines().get(0).substring(5, 9));
}
return outputSplitFile;
}
}
GcrTextFileReader.java
package com.fiam.gcr.batch.reader;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeJob;
import org.springframework.batch.core.annotation.BeforeRead;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import com.fiam.gcr.batch.bean.SplitFile;
import com.fiam.gcr.batch.util.Constants;
public class GcrTextFileReader implements ItemReader<SplitFile>{
private BufferedReader bin;
private Integer count;
private List<String> allLines = new ArrayList<String>();
public GcrTextFileReader(){
try {
FileReader fr = new FileReader(Constants.inputFolder+Constants.inputFile);
this.bin = new BufferedReader(fr);
this.count = 0;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
@Override
public SplitFile read() throws Exception, UnexpectedInputException,
ParseException {
System.out.println("Reading");
String text = bin.readLine();
SplitFile splitFile = new SplitFile();
String line ="";
while(text!=null){
line=text;
if(line.startsWith("FDXXX") && count==0){
allLines = new ArrayList<String>();
allLines.add(line);
text = bin.readLine();
count++;
}else if(line.startsWith("FDXXX") && count>0){
splitFile.setFileLines(allLines);
allLines = new ArrayList<String>();
allLines.add(line);
count++;
return splitFile;
}else{
allLines.add(line);
text = bin.readLine();
}
/*
if(line.startsWith("FDXXX")){
if (count>0) {
splitFile.setFileLines(allLines);
count++;
allLines = new ArrayList<String>();
allLines.add(line);
return splitFile;
}
count++;
text = bin.readLine();
}
else{
allLines.add(line);
text = bin.readLine();
}
*/
}
if(allLines.size()>0){
splitFile.setFileLines(allLines);
allLines = new ArrayList<String>();
count++;
return splitFile;
}
count = 0;
return null;
}
}
App.java
package com.fiam.gcr.batch.runnable;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameter;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import com.fiam.gcr.batch.util.Constants;
public class App {
public static void main(String[] args) {
String[] springConfig =
{
"spring/batch/config/applicationContext.xml",
"spring/batch/config/jobConfig.xml"
};
ApplicationContext context =
new ClassPathXmlApplicationContext(springConfig);
JobLauncher jobLauncher = (JobLauncher) context.getBean("jobLauncher");
Job job = (Job) context.getBean("clean_split");
try {
JobExecution execution = jobLauncher.run(job, new JobParameters());
System.out.println("Delete Status : " + execution.getExecutionContext().getString("deleteStatus"));
System.out.println("Total Files created : " + execution.getExecutionContext().getInt("totalFilesCreated"));
System.out.println("Validation : " + execution.getExecutionContext().getString("validationStatus"));
System.out.println("Exit Status : " + execution.getStatus());
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Done");
}
}
CleanDirectory.java
package com.fiam.gcr.batch.tasklet;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.core.io.Resource;
import org.springframework.util.Assert;
import com.fiam.gcr.batch.util.Constants;
public class CleanDirectory implements Tasklet, InitializingBean{
private Resource directory;
@Override
public void afterPropertiesSet() throws Exception {
Assert.notNull(directory, "directory must be set");
}
@Override
public RepeatStatus execute(StepContribution contribution,
ChunkContext chunkContext) throws Exception {
File directory = new File(Constants.outputFolder);
FileUtils.cleanDirectory(directory);
chunkContext
.getStepContext()
.getStepExecution()
.getJobExecution()
.getExecutionContext()
.put("deleteStatus", "SUCCESS");
System.out.println("Deleted all existing files.");
return RepeatStatus.FINISHED;
}
public Resource getDirectory() {
return directory;
}
public void setDirectory(Resource directory) {
this.directory = directory;
}
}
SplitValidator.java
package com.fiam.gcr.batch.tasklet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.util.Assert;
import com.fiam.gcr.batch.util.Constants;
public class SplitValidator implements Tasklet, InitializingBean{
private String outputFolder;
private String inputFolder;
private String inputFileName;
@Override
public void afterPropertiesSet() throws Exception {
Assert.notNull(outputFolder, "outputFolder value must be set");
}
@Override
public RepeatStatus execute(StepContribution contribution,
ChunkContext chunkContext) throws Exception {
System.out.println("Validating...");
FileReader fr = new FileReader(inputFolder+inputFileName);
BufferedReader bin = new BufferedReader(fr);
String line="";
List<String> accountList = new ArrayList<String>();
Set<String> allAccounts = new HashSet<String>();
while ((line = bin.readLine()) != null) {
if(line.startsWith("FDXXX")){
if (line.length()>8) {
accountList.add(line.substring(5, 9));
allAccounts.add(line.substring(5, 9));
}
}
}
System.out.println("Accounts to be processed : "+allAccounts);
List<String> filesCreated = new ArrayList<String>();
File[] files = new File(outputFolder).listFiles();
//If this pathname does not denote a directory, then listFiles() returns null.
for (File file : files) {
if (file.isFile()) {
filesCreated.add(FilenameUtils.removeExtension(file.getName()));
}
}
System.out.println("Files created : "+filesCreated);
if(allAccounts.size()==filesCreated.size()){
chunkContext
.getStepContext()
.getStepExecution()
.getJobExecution()
.getExecutionContext()
.put("validationStatus", "SUCCESS");
}else{
chunkContext
.getStepContext()
.getStepExecution()
.getJobExecution()
.getExecutionContext()
.put("validationStatus", "FAILED");
}
return RepeatStatus.FINISHED;
}
public void setOutputFolder(String outputFolder) {
this.outputFolder = outputFolder;
}
public void setInputFolder(String inputFolder) {
this.inputFolder = inputFolder;
}
public void setInputFileName(String inputFileName) {
this.inputFileName = inputFileName;
}
}
Constants.java
package com.fiam.gcr.batch.util;
public class Constants {
public static final String inputFolder ="F:\\files\\GCR\\input\\";
public static final String inputFile ="simplet.txt";
public static final String outputFolder ="F:\\files\\GCR\\output\\";
//public static final String inputFolder ="";
}
GcrTextFileWriter.java
package com.fiam.gcr.batch.writer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.List;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.item.ItemWriter;
import com.fiam.gcr.batch.bean.SplitFile;
import com.fiam.gcr.batch.util.Constants;
public class GcrTextFileWriter implements ItemWriter<SplitFile> {
private int totalFilesCreated=0;
@BeforeStep
public void BeforeStep(StepExecution stepExecution){
stepExecution
.getJobExecution()
.getExecutionContext()
.putInt("totalFilesCreated", 0);
System.out.println("Splitting process started");
}
@AfterStep
public void AfterStep(StepExecution stepExecution) {
//update
stepExecution
.getJobExecution()
.getExecutionContext()
.put("totalFilesCreated", totalFilesCreated);
System.out.println("Splitting process ended.");
}
@Override
public void write(List<? extends SplitFile> splitFiles) throws Exception {
if (splitFiles.size()>0) {
for (SplitFile splitFile : splitFiles) {
File file = new File(Constants.outputFolder
+ splitFile.getFileName() + ".txt");
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
FileWriter fw = new FileWriter(file.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
for (String line : splitFile.getFileLines()) {
bw.write(line);
bw.newLine();
}
bw.close();
totalFilesCreated++;
}
}
System.out.println("__________"+splitFiles.size()+"__________");
}
}
}
applicationContext.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
<bean id="transactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
<bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository"/>
</bean>
<bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">
<property name="transactionManager" ref="transactionManager"/>
</bean>
<bean id="simpleJob" class="org.springframework.batch.core.job.SimpleJob" abstract="true">
<property name="jobRepository" ref="jobRepository" />
</bean>
</beans>
jobConfig.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:task="http://www.springframework.org/schema/task"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/batch
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
<import resource="applicationContext.xml"/>
<bean id="gcrTextFileReader" class="com.fiam.gcr.batch.reader.GcrTextFileReader" />
<bean id="gcrTextFileProcessor" class="com.fiam.gcr.batch.processor.GcrTextFileProcessor" scope="step"/>
<bean id="gcrTextFileWriter" class="com.fiam.gcr.batch.writer.GcrTextFileWriter" />
<!--
<batch:job id="testJob" job-repository="jobRepository" parent="simpleJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="gcrTextFileReader" processor="gcrTextFileProcessor" writer="gcrTextFileWriter" commit-interval="2"/>
</batch:tasklet>
</batch:step>
</batch:job>
-->
<!-- <batch:job id="testJob" job-repository="jobRepository" parent="simpleJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="gcrTextFileReader" processor="gcrTextFileProcessor" writer="gcrTextFileWriter" commit-interval="2"/>
</batch:tasklet>
</batch:step>
</batch:job>
-->
<job id="clean_split" job-repository="jobRepository" parent="simpleJob" xmlns="http://www.springframework.org/schema/batch">
<step id="cleanDir" next="splitFiles">
<tasklet ref="cleanDirTasklet" />
</step>
<step id="splitFiles" next="validateSplit">
<tasklet transaction-manager="transactionManager">
<chunk reader="gcrTextFileReader" processor="gcrTextFileProcessor" writer="gcrTextFileWriter" commit-interval="3" />
</tasklet>
</step>
<step id="validateSplit">
<tasklet ref="validateSplitTasklet" />
</step>
</job>
<bean id="cleanDirTasklet" class="com.fiam.gcr.batch.tasklet.CleanDirectory" >
<property name="directory" value="F:\\files\\GCR\\output\\" />
</bean>
<bean id="validateSplitTasklet" class="com.fiam.gcr.batch.tasklet.SplitValidator" >
<property name="outputFolder" value="F:\\files\\GCR\\output\\" />
<property name="inputFolder" value="F:\\files\\GCR\\input\\" />
<property name="inputFileName" value="simplet.txt" />
</bean>
</beans>
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.mkyong</groupId>
<artifactId>SpringBatchExample</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>SpringBatchExample</name>
<url>http://maven.apache.org</url>
<properties>
<jdk.version>1.6</jdk.version>
<spring.version>3.2.2.RELEASE</spring.version>
<spring.batch.version>3.0.6.RELEASE</spring.batch.version>
<mysql.driver.version>5.1.25</mysql.driver.version>
</properties>
<dependencies>
<!-- Spring Core -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>${spring.version}</version>
</dependency>
<!-- Spring Core -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>${spring.version}</version>
</dependency>
<!-- Spring Batch dependencies -->
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-core</artifactId>
<version>${spring.batch.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-infrastructure</artifactId>
<version>${spring.batch.version}</version>
</dependency>
<!-- MySQL database driver -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.driver.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
</dependencies>
<build>
<finalName>spring-batch</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.9</version>
<configuration>
<downloadSources>true</downloadSources>
<downloadJavadocs>false</downloadJavadocs>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>${jdk.version}</source>
<target>${jdk.version}</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
Source file can be downloaded from here