Skip to content

Commit e991e6d

Browse files
committed
Add and use CSVParser.Builder and builder()
1 parent 9dcc633 commit e991e6d

File tree

12 files changed

+195
-47
lines changed

12 files changed

+195
-47
lines changed

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
<!-- FIX -->
4545
<!-- ADD -->
4646
<action type="add" issue="CSV-313" dev="ggregory" due-to="Gary Gregory">Add CSVPrinter.getRecordCount().</action>
47+
<action type="add" dev="ggregory" due-to="Gary Gregory">Add and use CSVParser.Builder and builder().</action>
4748
<!-- UPDATE -->
4849
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump org.apache.commons:commons-parent from 76 to 78 #486, #495.</action>
4950
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump org.codehaus.mojo:taglist-maven-plugin from 3.1.0 to 3.2.1 #493.</action>

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,6 +1370,15 @@ private static boolean containsLineBreak(final String source) {
13701370
return contains(source, Constants.CR) || contains(source, Constants.LF);
13711371
}
13721372

1373+
/**
1374+
* Creates a null-safe copy of the given instance.
1375+
*
1376+
* @return a copy of the given instance or null if the input is null.
1377+
*/
1378+
static CSVFormat copy(final CSVFormat format) {
1379+
return format != null ? format.copy() : null;
1380+
}
1381+
13731382
static boolean isBlank(final String value) {
13741383
return value == null || value.trim().isEmpty();
13751384
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import java.util.stream.Stream;
4848
import java.util.stream.StreamSupport;
4949

50+
import org.apache.commons.io.build.AbstractStreamBuilder;
5051
import org.apache.commons.io.function.Uncheck;
5152

5253
/**
@@ -142,6 +143,65 @@
142143
*/
143144
public final class CSVParser implements Iterable<CSVRecord>, Closeable {
144145

146+
/**
147+
* Builds a new {@link CSVParser}.
148+
*
149+
* @since 1.13.0
150+
*/
151+
public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {
152+
153+
private CSVFormat format;
154+
private long characterOffset;
155+
private long recordNumber;
156+
157+
/**
158+
* Constructs a new instance.
159+
*/
160+
protected Builder() {
161+
// empty
162+
}
163+
164+
@SuppressWarnings("resource")
165+
@Override
166+
public CSVParser get() throws IOException {
167+
return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber);
168+
}
169+
170+
/**
171+
* Sets the lexer offset when the parser does not start parsing at the beginning of the source.
172+
*
173+
* @param characterOffset the lexer offset.
174+
* @return this instance.
175+
*/
176+
public Builder setCharacterOffset(final long characterOffset) {
177+
this.characterOffset = characterOffset;
178+
return asThis();
179+
}
180+
181+
/**
182+
* Sets the CSV format. A copy of the given format is kept.
183+
*
184+
* @param format the CSV format, null is equivalent to {@link CSVFormat#DEFAULT}.
185+
* @return this instance.
186+
*/
187+
public Builder setFormat(final CSVFormat format) {
188+
this.format = CSVFormat.copy(format);
189+
return asThis();
190+
}
191+
192+
/**
193+
* Sets the next record number to assign.
194+
*
195+
* @param recordNumber the next record number to assign.
196+
* @return this instance.
197+
*/
198+
public Builder setRecordNumber(final long recordNumber) {
199+
this.recordNumber = recordNumber;
200+
return asThis();
201+
}
202+
203+
}
204+
145205
final class CSVRecordIterator implements Iterator<CSVRecord> {
146206
private CSVRecord current;
147207

@@ -190,7 +250,6 @@ public void remove() {
190250
throw new UnsupportedOperationException();
191251
}
192252
}
193-
194253
/**
195254
* Header information based on name and position.
196255
*/
@@ -212,6 +271,16 @@ private static final class Headers {
212271
}
213272
}
214273

274+
/**
275+
* Creates a new builder.
276+
*
277+
* @return a new builder.
278+
* @since 1.13.0
279+
*/
280+
public static Builder builder() {
281+
return new Builder();
282+
}
283+
215284
/**
216285
* Creates a parser for the given {@link File}.
217286
*
@@ -427,7 +496,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
427496
* @param characterOffset
428497
* Lexer offset when the parser does not start parsing at the beginning of the source.
429498
* @param recordNumber
430-
* The next record number to assign
499+
* The next record number to assign.
431500
* @throws IllegalArgumentException
432501
* If the parameters of the format are inconsistent or if either the reader or format is null.
433502
* @throws IOException

src/test/java/org/apache/commons/csv/CSVFileParserTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ private String readTestData(final BufferedReader reader) throws IOException {
5858
@ParameterizedTest
5959
@MethodSource("generateData")
6060
public void testCSVFile(final File testFile) throws Exception {
61-
try (FileReader fr = new FileReader(testFile); BufferedReader testData = new BufferedReader(fr)) {
62-
String line = readTestData(testData);
61+
try (FileReader fr = new FileReader(testFile); BufferedReader testDataReader = new BufferedReader(fr)) {
62+
String line = readTestData(testDataReader);
6363
assertNotNull("file must contain config line", line);
6464
final String[] split = line.split(" ");
6565
assertTrue(split.length >= 1, testFile.getName() + " require 1 param");
@@ -81,7 +81,7 @@ public void testCSVFile(final File testFile) throws Exception {
8181
fail(testFile.getName() + " unexpected option: " + option);
8282
}
8383
}
84-
line = readTestData(testData); // get string version of format
84+
line = readTestData(testDataReader); // get string version of format
8585
assertEquals(line, format.toString(), testFile.getName() + " Expected format ");
8686

8787
// Now parse the file and compare against the expected results
@@ -94,7 +94,7 @@ public void testCSVFile(final File testFile) throws Exception {
9494
parsed += "#" + comment.replace("\n", "\\n");
9595
}
9696
final int count = record.size();
97-
assertEquals(readTestData(testData), count + ":" + parsed, testFile.getName());
97+
assertEquals(readTestData(testDataReader), count + ":" + parsed, testFile.getName());
9898
}
9999
}
100100
}

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 89 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@
6969
*/
7070
public class CSVParserTest {
7171

72+
private static final CSVFormat EXCEL_WITH_HEADER = CSVFormat.EXCEL.withHeader();
73+
7274
private static final Charset UTF_8 = StandardCharsets.UTF_8;
7375

7476
private static final String UTF_8_NAME = UTF_8.name();
7577

7678
private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" +
77-
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
78-
" \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
79+
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
80+
" \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
7981

8082
private static final String CSV_INPUT_1 = "a,b,c,d";
8183

@@ -220,48 +222,54 @@ public void testBackslashEscapingOld() throws IOException {
220222
@Disabled("CSV-107")
221223
public void testBOM() throws IOException {
222224
final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv");
223-
try (final CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, CSVFormat.EXCEL.withHeader())) {
225+
try (final CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, EXCEL_WITH_HEADER)) {
224226
parser.forEach(record -> assertNotNull(record.get("Date")));
225227
}
226228
}
227229

228230
@Test
229231
public void testBOMInputStreamParserWithInputStream() throws IOException {
230232
try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv");
231-
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
233+
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, EXCEL_WITH_HEADER)) {
232234
parser.forEach(record -> assertNotNull(record.get("Date")));
233235
}
234236
}
235237

236238
@Test
237239
public void testBOMInputStreamParserWithReader() throws IOException {
238240
try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
239-
final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
241+
final CSVParser parser = CSVParser.builder()
242+
.setReader(reader)
243+
.setFormat(EXCEL_WITH_HEADER)
244+
.get()) {
240245
parser.forEach(record -> assertNotNull(record.get("Date")));
241246
}
242247
}
243248

244249
@Test
245250
public void testBOMInputStreamParseWithReader() throws IOException {
246251
try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
247-
final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
252+
final CSVParser parser = CSVParser.builder()
253+
.setReader(reader)
254+
.setFormat(EXCEL_WITH_HEADER)
255+
.get()) {
248256
parser.forEach(record -> assertNotNull(record.get("Date")));
249257
}
250258
}
251259

252260
@Test
253261
public void testCarriageReturnEndings() throws IOException {
254-
final String code = "foo\rbaar,\rhello,world\r,kanu";
255-
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
262+
final String string = "foo\rbaar,\rhello,world\r,kanu";
263+
try (final CSVParser parser = CSVParser.builder().setCharSequence(string).get()) {
256264
final List<CSVRecord> records = parser.getRecords();
257265
assertEquals(4, records.size());
258266
}
259267
}
260268

261269
@Test
262270
public void testCarriageReturnLineFeedEndings() throws IOException {
263-
final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
264-
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
271+
final String string = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
272+
try (final CSVParser parser = CSVParser.builder().setCharSequence(string).get()) {
265273
final List<CSVRecord> records = parser.getRecords();
266274
assertEquals(4, records.size());
267275
}
@@ -569,7 +577,7 @@ public void testExcelFormat2() throws Exception {
569577
@Test
570578
public void testExcelHeaderCountLessThanData() throws Exception {
571579
final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
572-
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
580+
try (final CSVParser parser = CSVParser.parse(code, EXCEL_WITH_HEADER)) {
573581
parser.getRecords().forEach(record -> {
574582
assertEquals("a", record.get("A"));
575583
assertEquals("b", record.get("B"));
@@ -783,7 +791,10 @@ public void testGetOneLine() throws IOException {
783791
public void testGetOneLineOneParser() throws IOException {
784792
final CSVFormat format = CSVFormat.DEFAULT;
785793
try (final PipedWriter writer = new PipedWriter();
786-
final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
794+
final CSVParser parser = CSVParser.builder()
795+
.setReader(new PipedReader(writer))
796+
.setFormat(format)
797+
.get()) {
787798
writer.append(CSV_INPUT_1);
788799
writer.append(format.getRecordSeparator());
789800
final CSVRecord record1 = parser.nextRecord();
@@ -1232,35 +1243,68 @@ public void testNotValueCSV() throws IOException {
12321243
public void testParse() throws Exception {
12331244
final ClassLoader loader = ClassLoader.getSystemClassLoader();
12341245
final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv");
1235-
final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D");
1246+
final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader("A", "B", "C", "D").build();
12361247
final Charset charset = StandardCharsets.UTF_8;
1237-
1238-
try (@SuppressWarnings("resource") // CSVParser closes the input resource
1239-
final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
1248+
// Reader
1249+
try (final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
12401250
parseFully(parser);
12411251
}
1242-
try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) {
1252+
try (final CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).get()) {
12431253
parseFully(parser);
12441254
}
1245-
try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) {
1255+
// String
1256+
final Path path = Paths.get(url.toURI());
1257+
final String string = new String(Files.readAllBytes(path), charset);
1258+
try (final CSVParser parser = CSVParser.parse(string, format)) {
12461259
parseFully(parser);
12471260
}
1248-
try (@SuppressWarnings("resource") // CSVParser closes the input resource
1249-
final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
1261+
try (final CSVParser parser = CSVParser.builder().setCharSequence(string).setFormat(format).get()) {
12501262
parseFully(parser);
12511263
}
1252-
try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) {
1264+
// File
1265+
final File file = new File(url.toURI());
1266+
try (final CSVParser parser = CSVParser.parse(file, charset, format)) {
12531267
parseFully(parser);
12541268
}
1269+
try (final CSVParser parser = CSVParser.builder().setFile(file).setCharset(charset).setFormat(format).get()) {
1270+
parseFully(parser);
1271+
}
1272+
// InputStream
1273+
try (final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
1274+
parseFully(parser);
1275+
}
1276+
try (final CSVParser parser = CSVParser.builder().setInputStream(url.openStream()).setCharset(charset).setFormat(format).get()) {
1277+
parseFully(parser);
1278+
}
1279+
// Path
1280+
try (final CSVParser parser = CSVParser.parse(path, charset, format)) {
1281+
parseFully(parser);
1282+
}
1283+
try (final CSVParser parser = CSVParser.builder().setPath(path).setCharset(charset).setFormat(format).get()) {
1284+
parseFully(parser);
1285+
}
1286+
// URL
12551287
try (final CSVParser parser = CSVParser.parse(url, charset, format)) {
12561288
parseFully(parser);
12571289
}
1290+
try (final CSVParser parser = CSVParser.builder().setURI(url.toURI()).setCharset(charset).setFormat(format).get()) {
1291+
parseFully(parser);
1292+
}
1293+
// InputStreamReader
12581294
try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) {
12591295
parseFully(parser);
12601296
}
1297+
try (final CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).get()) {
1298+
parseFully(parser);
1299+
}
1300+
// InputStreamReader with longs
12611301
try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /* characterOffset= */0, /* recordNumber= */1)) {
12621302
parseFully(parser);
12631303
}
1304+
try (final CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).setCharacterOffset(0)
1305+
.setRecordNumber(0).get()) {
1306+
parseFully(parser);
1307+
}
12641308
}
12651309

12661310
@Test
@@ -1380,7 +1424,10 @@ public void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format
13801424
try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) {
13811425
printer.printRecords(Stream.of(lines));
13821426
}
1383-
try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat())) {
1427+
try (CSVParser csvRecords = CSVParser.builder()
1428+
.setReader(new StringReader(buf.toString()))
1429+
.setFormat(format.getFormat())
1430+
.get()) {
13841431
for (final String[] line : lines) {
13851432
assertArrayEquals(line, csvRecords.nextRecord().values());
13861433
}
@@ -1654,6 +1701,26 @@ private void validateRecordPosition(final String lineSeparator) throws IOExcepti
16541701
assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
16551702
}
16561703
// now try to read starting at record 3
1704+
try (CSVParser parser = CSVParser.builder()
1705+
.setReader(new StringReader(code.substring((int) positionRecord3)))
1706+
.setFormat(format)
1707+
.setCharacterOffset(positionRecord3)
1708+
.setRecordNumber(3)
1709+
.get()) {
1710+
CSVRecord record;
1711+
// nextRecord
1712+
assertNotNull(record = parser.nextRecord());
1713+
assertEquals(3, record.getRecordNumber());
1714+
assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1715+
assertEquals("A" + lineSeparator + "A", record.get(0));
1716+
assertEquals("B" + lineSeparator + "B", record.get(1));
1717+
assertEquals("CC", record.get(2));
1718+
// nextRecord
1719+
assertNotNull(record = parser.nextRecord());
1720+
assertEquals(4, record.getRecordNumber());
1721+
assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1722+
assertEquals("\u00c4", record.get(0));
1723+
} // again with ctor
16571724
try (CSVParser parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3)) {
16581725
CSVRecord record;
16591726
// nextRecord

src/test/java/org/apache/commons/csv/PerformanceTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ private static void testExtendedBuffer(final boolean makeString) throws Exceptio
299299
}
300300

301301
private static void testParseCommonsCSV() throws Exception {
302-
testParser("CSV", () -> new CSVParser(createReader(), format));
302+
testParser("CSV", () -> CSVParser.builder().setReader(createReader()).setFormat(format).get());
303303
}
304304

305305
private static void testParsePath() throws Exception {

0 commit comments

Comments
 (0)