CSVFileParser.java
/*
* $Source$
* $Revision$
*
* Copyright (C) 2001 Myles Chippendale
*
* Part of Melati (http://melati.org), a framework for the rapid
* development of clean, maintainable web applications.
*
* Melati is free software; Permission is granted to copy, distribute
* and/or modify this software under the terms either:
*
* a) the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version,
*
* or
*
* b) any version of the Melati Software License, as published
* at http://melati.org
*
* You should have received a copy of the GNU General Public License and
* the Melati Software License along with this program;
* if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
* GNU General Public License and visit http://melati.org to obtain the
* Melati Software License.
*
* Feel free to contact the Developers of Melati (http://melati.org),
* if you would like to work out a different arrangement than the options
* outlined here. It is our intention to allow Melati to be used by as
* wide an audience as possible.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Contact details for copyright holder:
*
* Myles Chippendale <mylesc At paneris.org>
*/
package org.melati.poem.csv;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.NoSuchElementException;
/**
* A utility for tokenising a file made up of comma-separated
* variables. We allow for fields having returns in them.
*
* <PRE>
* foo, bar om,,"baz, ,oof",xyz, ->
* "foo", " bar om", "", "baz, , oof", "xyz", ""
*
* foo, "bar
* bar
* bar", baz ->
* "foo", "bar\u0015bar\u0015bar", "baz"
* </PRE>
*
* Each record (which is usually a line, unless some fields have
* a line break in them) is accessed one at a time by calling
* <code>nextRecord()</code>. Within each record
* <code>recordHasMoreFields()</code> and <code>nextField()</code>
* can be used like an Enumeration to iterate through the fields.
*
* @author mylesc, based heavily on
* orginal CSVStringEnumeration williamc
*/
public class CSVFileParser {
private BufferedReader reader = null;
int lineNo = 0; // The first line will be line '1'
private String line = "";
private boolean emptyLastField = false;
int p = 0;
/**
* Constructor.
* @param reader file reader
*/
public CSVFileParser(BufferedReader reader) {
this.reader = reader;
}
/**
* @return whether there is another line
*/
public boolean nextRecord() throws IOException {
return nextLine();
}
private boolean nextLine() throws IOException {
// Not confident about this
// but we need to return false if we have reached end and closed the file
if (!reader.ready()) return false;
line = reader.readLine();
// This should be false anyway if we're called from nextToken()
emptyLastField = false;
p = 0;
if (line == null) {
reader.close();
return false;
}
lineNo++;
return true;
}
/**
* Return the line number.
*
* @return the current lineNo
*/
public int getLineNo() {
return lineNo;
}
/**
* Are there any more tokens to come?
* @return whether there are more fields
*/
public boolean recordHasMoreFields() {
return emptyLastField || p < line.length();
}
/**
* @return the next token as a String
*/
public String nextField() throws IOException {
return nextToken(false);
}
/**
* @return the next token as a String
*/
private String nextToken(boolean inUnclosedQuotes) throws IOException {
if (emptyLastField) {
emptyLastField = false;
return "";
}
if (p >= line.length()) throw new NoSuchElementException();
if (inUnclosedQuotes || (line.charAt(p) == '"' && (++p>0))) {
// we need to allow for quotes inside quoted fields, so now test for ",
int q = line.indexOf("\",", p);
// if it is not there, we are (hopefully) at the end of a line
if (q == -1 && (line.indexOf('"', p) == line.length()-1))
q = line.length()-1;
// If we don't find the end quote try reading in more lines
// since fields can have \n in them
if (q == -1) {
String sofar = line.substring(p, line.length());
if (!nextLine())
throw new IllegalArgumentException("Unclosed quotes on line "
+ lineNo);
return sofar + "\n" + nextToken(true);
}
String it = line.substring(p, q);
++q;
p = q+1;
if (q < line.length()) {
if (line.charAt(q) != ',') {
p = line.length();
throw new IllegalArgumentException("No comma after quotes on line "
+ lineNo);
}
else if (q == line.length() - 1)
emptyLastField = true;
}
return it;
} else {
int q = line.indexOf(',', p);
if (q == -1) {
String it = line.substring(p);
p = line.length();
return it;
} else {
String it = line.substring(p, q);
if (q == line.length() - 1)
emptyLastField = true;
p = q + 1;
return it;
}
}
}
/**
* Test harness.
* @param args arguments
* @throws Exception if anything fails
*/
public static void main(String[] args) throws Exception {
System.out.println("***** Reading file " + args[0]);
BufferedReader reader = new BufferedReader(
new FileReader(new File(args[0])));
CSVFileParser toks = new CSVFileParser(reader);
int recordCount = 0;
while(toks.nextRecord()) {
System.out.println("*** Record " + ++recordCount);
int i = 0;
while (toks.recordHasMoreFields()) {
System.out.println("Field " + ++i + ":" + toks.nextField());
}
}
}
}