Skip to content

Commit 32437c0

Browse files
vboulayecowtowncoder
authored andcommitted
Fix #15: Implement CsvParser.Feature.SKIP_EMPTY_LINES
1 parent 4eff590 commit 32437c0

File tree

8 files changed

+235
-45
lines changed

8 files changed

+235
-45
lines changed

csv/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ abstractions.
4040
<dependency>
4141
<groupId>com.google.guava</groupId>
4242
<artifactId>guava</artifactId>
43-
<version>18.0</version>
43+
<version>25.0-jre</version>
4444
<scope>test</scope>
4545
</dependency>
4646
</dependencies>

csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ public enum Feature
7373
IGNORE_TRAILING_UNMAPPABLE(false),
7474

7575
/**
76-
* Feature that allows skipping input lines that are completely empty, instead
76+
* Feature that allows skipping input lines that are completely empty or blank (composed only of whitespace),
77+
* instead of being decoded as lines of just a single column with an empty/blank String value (or,
7778
* of being decoded as lines of just a single column with empty String value (or,
7879
* depending on binding, `null`).
7980
*<p>
8081
* Feature is disabled by default.
8182
*
82-
* @since 2.9
83+
* @since 2.10
8384
*/
8485
SKIP_EMPTY_LINES(false),
8586

@@ -787,19 +788,19 @@ protected void _readHeaderLine() throws IOException {
787788
*/
788789
protected JsonToken _handleStartDoc() throws IOException
789790
{
790-
// also, if comments enabled, may need to skip leading ones
791-
_reader.skipLeadingComments();
791+
// also, if comments enabled, or skip empty lines, may need to skip leading ones
792+
_reader.skipLinesWhenNeeded();
792793
// First things first: are we expecting header line? If so, read, process
793794
if (_schema.usesHeader()) {
794795
_readHeaderLine();
795-
_reader.skipLeadingComments();
796+
_reader.skipLinesWhenNeeded();
796797
}
797798
// and if we are to skip the first data line, skip it
798799
if (_schema.skipsFirstDataRow()) {
799800
_reader.skipLine();
800-
_reader.skipLeadingComments();
801+
_reader.skipLinesWhenNeeded();
801802
}
802-
803+
803804
// Only one real complication, actually; empty documents (zero bytes).
804805
// Those have no entries. Should be easy enough to detect like so:
805806
final boolean wrapAsArray = Feature.WRAP_AS_ARRAY.enabledIn(_formatFeatures);

csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@ public class CsvDecoder
6565
protected boolean _trimSpaces;
6666

6767
protected boolean _allowComments;
68-
68+
69+
/**
70+
* @since 2.10.1
71+
*/
72+
protected boolean _skipBlankLines; // NOTE: can be final in 3.0, not before
73+
6974
/**
7075
* Maximum of quote character, linefeeds (\r and \n), escape character.
7176
*/
@@ -111,14 +116,14 @@ public class CsvDecoder
111116
* needs to be handled (indicates end-of-record).
112117
*/
113118
protected int _pendingLF = 0;
114-
119+
115120
/**
116121
* Flag that indicates whether parser is closed or not. Gets
117122
* set when parser is either closed by explicit call
118123
* ({@link #close}) or when end-of-input is reached.
119124
*/
120125
protected boolean _closed;
121-
126+
122127
/*
123128
/**********************************************************************
124129
/* Current input location information
@@ -152,7 +157,7 @@ public class CsvDecoder
152157
* For big (gigabyte-sized) sizes are possible, needs to be long,
153158
* unlike pointers and sizes related to in-memory buffers.
154159
*/
155-
protected long _tokenInputTotal = 0;
160+
protected long _tokenInputTotal = 0;
156161

157162
/**
158163
* Input row on which current token starts, 1-based
@@ -202,8 +207,7 @@ public class CsvDecoder
202207

203208
final static double MIN_INT_D = Integer.MIN_VALUE;
204209
final static double MAX_INT_D = Integer.MAX_VALUE;
205-
206-
210+
207211
// Digits, numeric
208212
final protected static int INT_0 = '0';
209213
final protected static int INT_1 = '1';
@@ -254,8 +258,8 @@ public class CsvDecoder
254258
/**********************************************************************
255259
*/
256260

257-
@SuppressWarnings("deprecation")
258-
public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, TextBuffer textBuffer,
261+
public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema,
262+
TextBuffer textBuffer,
259263
int stdFeatures, int csvFeatures)
260264
{
261265
_owner = owner;
@@ -266,6 +270,7 @@ public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, T
266270
final boolean legacy = JsonParser.Feature.ALLOW_YAML_COMMENTS.enabledIn(stdFeatures);
267271
_allowComments = legacy | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
268272
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
273+
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
269274
_inputBuffer = ctxt.allocTokenBuffer();
270275
_bufferRecyclable = true; // since we allocated it
271276
_inputSource = r;
@@ -292,6 +297,7 @@ public void setSchema(CsvSchema schema)
292297
*/
293298
public void overrideFormatFeatures(int csvFeatures) {
294299
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
300+
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
295301
}
296302

297303
/*
@@ -482,39 +488,53 @@ public boolean startNewLine() throws IOException
482488
}
483489
_handleLF();
484490
}
485-
/* For now, we will only require that there is SOME data
486-
* following linefeed -- even spaces will do.
487-
* In future we may want to use better heuristics to possibly
488-
* skip trailing empty line?
489-
*/
490-
if ((_inputPtr >= _inputEnd) && !loadMore()) {
491-
return false;
492-
}
493-
494-
if (_allowComments && _inputBuffer[_inputPtr] == '#') {
495-
int i = _skipCommentLines();
496-
// end-of-input?
497-
if (i < 0) {
498-
return false;
499-
}
500-
// otherwise push last read char back
501-
--_inputPtr;
502-
}
503-
return true;
491+
return skipLinesWhenNeeded();
504492
}
505493

506-
public void skipLeadingComments() throws IOException
507-
{
508-
if (_allowComments) {
509-
if ((_inputPtr < _inputEnd) || loadMore()) {
510-
if (_inputBuffer[_inputPtr] == '#') {
511-
_skipCommentLines();
512-
--_inputPtr;
494+
/**
495+
* optionally skip lines that are empty or are comments, depending on the feature activated in the parser
496+
* @return false if the end of input was reached
497+
* @throws IOException
498+
* @since 2.10.1
499+
*/
500+
public boolean skipLinesWhenNeeded() throws IOException {
501+
if (!(_allowComments || _skipBlankLines)) {
502+
return hasMoreInput();
503+
}
504+
int firstCharacterPtr = _inputPtr;
505+
while (hasMoreInput()) {
506+
char ch = _inputBuffer[_inputPtr++];
507+
if (ch == '\r' || ch == '\n') {
508+
_pendingLF = ch;
509+
_handleLF();
510+
// track the start of the new line
511+
firstCharacterPtr = _inputPtr;
512+
continue;
513+
}
514+
if (ch == ' ') {
515+
// skip all blanks (in both comments/blanks skip mode)
516+
continue;
517+
}
518+
if (_allowComments) {
519+
if (_inputBuffer[firstCharacterPtr] == '#') {
520+
// on a commented line, skip everything
521+
continue;
522+
}
523+
if (ch == '#') {
524+
// we reach this point when whitespaces precedes the hash character
525+
// move the firstCharacterPtr to the '#' location in order to skip the line completely
526+
firstCharacterPtr = _inputPtr-1;
527+
continue;
513528
}
514529
}
530+
// we reached a non skippable character, this line needs to be parsed
531+
// rollback the input pointer to the beginning of the line
532+
_inputPtr = firstCharacterPtr;
533+
return true; // processing can go on
515534
}
535+
return false; // end of input
516536
}
517-
537+
518538
protected int _skipCommentLines() throws IOException
519539
{
520540
while ((_inputPtr < _inputEnd) || loadMore()) {

csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// Tests for [csv#56]
99
public class CommentsTest extends ModuleTestBase
1010
{
11-
final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n# another...\n";
11+
final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n # another...\n";
1212

1313
public void testWithoutComments() throws Exception
1414
{
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
package com.fasterxml.jackson.dataformat.csv.deser;
2+
3+
import com.fasterxml.jackson.databind.ObjectReader;
4+
import com.fasterxml.jackson.dataformat.csv.CsvParser;
5+
import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;
6+
7+
import static org.junit.Assert.assertArrayEquals;
8+
9+
// for [dataformats-text#15]: Allow skipping of empty lines
10+
public class SkipBlankLines15Test extends ModuleTestBase {
11+
12+
private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n";
13+
private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n";
14+
private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n #comment\n\ntrue,\n";
15+
private static final String CSV_WITH_FIRST_BLANK_LINE = "\n1,\"xyz\"\ntrue,\n";
16+
private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n \n\n";
17+
18+
public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception {
19+
String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE);
20+
// First, verify default behavior:
21+
assertArrayEquals(expected(
22+
row("1", "xyz"),
23+
row(""),
24+
row("true", "")
25+
), rows);
26+
}
27+
28+
public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception {
29+
String[][] rows = mapperForCsvAsArray()
30+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
31+
.readValue(CSV_WITH_EMPTY_LINE);
32+
// empty line is skipped
33+
assertArrayEquals(expected(
34+
row("1", "xyz"),
35+
row("true", "")
36+
), rows);
37+
}
38+
39+
40+
public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
41+
String[][] rows = mapperForCsvAsArray()
42+
.readValue(CSV_WITH_BLANK_LINE);
43+
// First, verify default behavior:
44+
assertArrayEquals(expected(
45+
row("1", "xyz"),
46+
row(" "),
47+
row("true", "")
48+
), rows);
49+
}
50+
51+
public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
52+
String[][] rows = mapperForCsvAsArray()
53+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
54+
.readValue(CSV_WITH_BLANK_LINE);
55+
// blank line is skipped
56+
assertArrayEquals(expected(
57+
row("1", "xyz"),
58+
row("true", "")
59+
), rows);
60+
}
61+
62+
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception {
63+
String[][] rows = mapperForCsvAsArray()
64+
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
65+
// First, verify default behavior:
66+
assertArrayEquals(expected(
67+
row("1", "xyz"),
68+
row(" "),
69+
row(" #comment"),
70+
row(""),
71+
row("true", "")
72+
), rows);
73+
}
74+
75+
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception {
76+
String[][] rows = mapperForCsvAsArray()
77+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
78+
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
79+
// blank/empty lines are skipped
80+
assertArrayEquals(expected(
81+
row("1", "xyz"),
82+
row(" #comment"),
83+
row("true", "")
84+
), rows);
85+
}
86+
87+
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception {
88+
String[][] rows = mapperForCsvAsArray()
89+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
90+
.with(CsvParser.Feature.ALLOW_COMMENTS)
91+
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
92+
// blank/empty/comment lines are skipped
93+
assertArrayEquals(expected(
94+
row("1", "xyz"),
95+
row("true", "")
96+
), rows);
97+
}
98+
99+
public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
100+
String[][] rows = mapperForCsvAsArray()
101+
.readValue(CSV_WITH_FIRST_BLANK_LINE);
102+
// First, verify default behavior:
103+
assertArrayEquals(expected(
104+
row(""),
105+
row("1", "xyz"),
106+
row("true", "")
107+
), rows);
108+
}
109+
110+
public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
111+
String[][] rows = mapperForCsvAsArray()
112+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
113+
.readValue(CSV_WITH_FIRST_BLANK_LINE);
114+
// blank line is skipped
115+
assertArrayEquals(expected(
116+
row("1", "xyz"),
117+
row("true", "")
118+
), rows);
119+
}
120+
121+
122+
public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
123+
String[][] rows = mapperForCsvAsArray()
124+
.readValue(CSV_WITH_TRAILING_BLANK_LINES);
125+
// First, verify default behavior:
126+
assertArrayEquals(expected(
127+
row("1", "xyz"),
128+
row("true", ""),
129+
row(" "),
130+
row("")
131+
), rows);
132+
}
133+
134+
public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
135+
String[][] rows = mapperForCsvAsArray()
136+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
137+
.readValue(CSV_WITH_FIRST_BLANK_LINE);
138+
// blank lines are skipped
139+
assertArrayEquals(expected(
140+
row("1", "xyz"),
141+
row("true", "")
142+
), rows);
143+
}
144+
145+
private ObjectReader mapperForCsvAsArray() {
146+
// when wrapped as an array, we'll get array of Lists:
147+
return mapperForCsv()
148+
.readerFor(String[][].class)
149+
.with(CsvParser.Feature.WRAP_AS_ARRAY);
150+
}
151+
152+
private String[][] expected(String[]... rowInputs) {
153+
return rowInputs;
154+
}
155+
156+
private String[] row(String... cellInputs) {
157+
return cellInputs;
158+
}
159+
}

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<groupId>com.fasterxml.jackson</groupId>
55
<artifactId>jackson-base</artifactId>
6-
<version>2.10.0</version>
6+
<version>2.10.1-SNAPSHOT</version>
77
</parent>
88
<groupId>com.fasterxml.jackson.dataformat</groupId>
99
<artifactId>jackson-dataformats-text</artifactId>

release-notes/CREDITS-2.x

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,8 @@ Matti Bickel (wundrian@github)
7979
Maarten Winkels (mwinkels@github)
8080
* Contributed fix for #83: Update index of sequence context
8181
(2.10.0)
82+
83+
Vincent Boulaye (vboulaye@github)
84+
* Implemented #15: Add a `CsvParser.Feature.SKIP_EMPTY_LINES` to allow
85+
skipping empty rows
86+
(2.10.1)

0 commit comments

Comments
 (0)