import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
* @author Rizwan Sharif
*
*/
public class CsvParser {
/**
* Original Regex without escaped qoutes: ("(([^"]|"")+)"|([^,]+)|,)(?=(,|$))
* Regex explained :
* First Part : "(([^"]|"")+)" if value starts with " then it can either be followed by any character not " ([^"] or by "" , one or more times
* Second Part : ([^,]+) for simple values : any character except ,
* Third Part : , can be only comma
* Forth Part : all above three parts should be followed either by comma or by Endline.
*/
public static final String CSV_PATTERN = "(\"(([^\"]|\"\")+)\"|([^,]+)|,)(?=(,|$))";
private static Pattern csvRE;
public CsvParser() {
csvRE = Pattern.compile(CSV_PATTERN);
}
public List<String> parseLine(String line) {
List<String> list = new ArrayList<String>();
Matcher m = csvRE.matcher(line);
// For each field
while (m.find()) {
String match = m.group();
if (match == null)
break;
if (match.endsWith(",")) { // trim trailing ,
match = match.substring(0, match.length() - 1);
}
if (match.startsWith("\"")) { // assume also ends with
match = match.substring(1, match.length() - 1);
}
if (match.length() == 0)
match = null;
list.add(match);
}
return list;
}
}
Thursday, September 1, 2011
My Csv Parser For Java
Subscribe to:
Comments (Atom)