import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * * @author Rizwan Sharif * */ public class CsvParser { /** * Original Regex without escaped qoutes: ("(([^"]|"")+)"|([^,]+)|,)(?=(,|$)) * Regex explained : * First Part : "(([^"]|"")+)" if value starts with " then it can either be followed by any character not " ([^"] or by "" , one or more times * Second Part : ([^,]+) for simple values : any character except , * Third Part : , can be only comma * Forth Part : all above three parts should be followed either by comma or by Endline. */ public static final String CSV_PATTERN = "(\"(([^\"]|\"\")+)\"|([^,]+)|,)(?=(,|$))"; private static Pattern csvRE; public CsvParser() { csvRE = Pattern.compile(CSV_PATTERN); } public List<String> parseLine(String line) { List<String> list = new ArrayList<String>(); Matcher m = csvRE.matcher(line); // For each field while (m.find()) { String match = m.group(); if (match == null) break; if (match.endsWith(",")) { // trim trailing , match = match.substring(0, match.length() - 1); } if (match.startsWith("\"")) { // assume also ends with match = match.substring(1, match.length() - 1); } if (match.length() == 0) match = null; list.add(match); } return list; } }
Thursday, September 1, 2011
My Csv Parser For Java
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment