Skip to content

Commit 27a388f

Browse files
committed
Merge branch '2.10' into 2.11
2 parents 2348e1d + d39071a commit 27a388f

File tree

6 files changed

+246
-46
lines changed

6 files changed

+246
-46
lines changed

csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ public enum Feature
7373
IGNORE_TRAILING_UNMAPPABLE(false),
7474

7575
/**
76-
* Feature that allows skipping input lines that are completely empty, instead
76+
* Feature that allows skipping input lines that are completely empty or blank (composed only of whitespace),
77+
* instead of being decoded as lines of just a single column with an empty/blank String value (or,
7778
* of being decoded as lines of just a single column with empty String value (or,
7879
* depending on binding, `null`).
7980
*<p>
8081
* Feature is disabled by default.
8182
*
82-
* @since 2.9
83+
* @since 2.10
8384
*/
8485
SKIP_EMPTY_LINES(false),
8586

@@ -787,19 +788,19 @@ protected void _readHeaderLine() throws IOException {
787788
*/
788789
protected JsonToken _handleStartDoc() throws IOException
789790
{
790-
// also, if comments enabled, may need to skip leading ones
791-
_reader.skipLeadingComments();
791+
// also, if comments enabled, or skip empty lines, may need to skip leading ones
792+
_reader.skipLinesWhenNeeded();
792793
// First things first: are we expecting header line? If so, read, process
793794
if (_schema.usesHeader()) {
794795
_readHeaderLine();
795-
_reader.skipLeadingComments();
796+
_reader.skipLinesWhenNeeded();
796797
}
797798
// and if we are to skip the first data line, skip it
798799
if (_schema.skipsFirstDataRow()) {
799800
_reader.skipLine();
800-
_reader.skipLeadingComments();
801+
_reader.skipLinesWhenNeeded();
801802
}
802-
803+
803804
// Only one real complication, actually; empty documents (zero bytes).
804805
// Those have no entries. Should be easy enough to detect like so:
805806
final boolean wrapAsArray = Feature.WRAP_AS_ARRAY.enabledIn(_formatFeatures);

csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@ public class CsvDecoder
6565
protected boolean _trimSpaces;
6666

6767
protected boolean _allowComments;
68-
68+
69+
/**
70+
* @since 2.10.1
71+
*/
72+
protected boolean _skipBlankLines; // NOTE: can be final in 3.0, not before
73+
6974
/**
7075
* Maximum of quote character, linefeeds (\r and \n), escape character.
7176
*/
@@ -111,14 +116,14 @@ public class CsvDecoder
111116
* needs to be handled (indicates end-of-record).
112117
*/
113118
protected int _pendingLF = 0;
114-
119+
115120
/**
116121
* Flag that indicates whether parser is closed or not. Gets
117122
* set when parser is either closed by explicit call
118123
* ({@link #close}) or when end-of-input is reached.
119124
*/
120125
protected boolean _closed;
121-
126+
122127
/*
123128
/**********************************************************************
124129
/* Current input location information
@@ -152,7 +157,7 @@ public class CsvDecoder
152157
* For big (gigabyte-sized) sizes are possible, needs to be long,
153158
* unlike pointers and sizes related to in-memory buffers.
154159
*/
155-
protected long _tokenInputTotal = 0;
160+
protected long _tokenInputTotal = 0;
156161

157162
/**
158163
* Input row on which current token starts, 1-based
@@ -202,8 +207,7 @@ public class CsvDecoder
202207

203208
final static double MIN_INT_D = Integer.MIN_VALUE;
204209
final static double MAX_INT_D = Integer.MAX_VALUE;
205-
206-
210+
207211
// Digits, numeric
208212
final protected static int INT_0 = '0';
209213
final protected static int INT_1 = '1';
@@ -254,18 +258,19 @@ public class CsvDecoder
254258
/**********************************************************************
255259
*/
256260

257-
@SuppressWarnings("deprecation")
258-
public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema, TextBuffer textBuffer,
261+
public CsvDecoder(CsvParser owner, IOContext ctxt, Reader r, CsvSchema schema,
262+
TextBuffer textBuffer,
259263
int stdFeatures, int csvFeatures)
260264
{
261265
_owner = owner;
262266
_ioContext = ctxt;
263267
_inputSource = r;
264268
_textBuffer = textBuffer;
265269
_autoCloseInput = JsonParser.Feature.AUTO_CLOSE_SOURCE.enabledIn(stdFeatures);
266-
final boolean legacy = JsonParser.Feature.ALLOW_YAML_COMMENTS.enabledIn(stdFeatures);
267-
_allowComments = legacy | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
270+
final boolean oldComments = JsonParser.Feature.ALLOW_YAML_COMMENTS.enabledIn(stdFeatures);
271+
_allowComments = oldComments | CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
268272
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
273+
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
269274
_inputBuffer = ctxt.allocTokenBuffer();
270275
_bufferRecyclable = true; // since we allocated it
271276
_inputSource = r;
@@ -279,7 +284,9 @@ public void setSchema(CsvSchema schema)
279284
_separatorChar = schema.getColumnSeparator();
280285
_quoteChar = schema.getQuoteChar();
281286
_escapeChar = schema.getEscapeChar();
282-
_allowComments = _allowComments | schema.allowsComments();
287+
if (!_allowComments) {
288+
_allowComments = schema.allowsComments();
289+
}
283290
int max = Math.max(_separatorChar, _quoteChar);
284291
max = Math.max(max, _escapeChar);
285292
max = Math.max(max, '\r');
@@ -292,6 +299,13 @@ public void setSchema(CsvSchema schema)
292299
*/
293300
public void overrideFormatFeatures(int csvFeatures) {
294301
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
302+
_skipBlankLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
303+
304+
// 07-Oct-2019, tatu: not 100% accurate, as we have no access to legacy
305+
// setting. But close enough, fixed in 3.0
306+
if (CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures)) {
307+
_allowComments = true;
308+
}
295309
}
296310

297311
/*
@@ -482,39 +496,53 @@ public boolean startNewLine() throws IOException
482496
}
483497
_handleLF();
484498
}
485-
/* For now, we will only require that there is SOME data
486-
* following linefeed -- even spaces will do.
487-
* In future we may want to use better heuristics to possibly
488-
* skip trailing empty line?
489-
*/
490-
if ((_inputPtr >= _inputEnd) && !loadMore()) {
491-
return false;
492-
}
493-
494-
if (_allowComments && _inputBuffer[_inputPtr] == '#') {
495-
int i = _skipCommentLines();
496-
// end-of-input?
497-
if (i < 0) {
498-
return false;
499-
}
500-
// otherwise push last read char back
501-
--_inputPtr;
502-
}
503-
return true;
499+
return skipLinesWhenNeeded();
504500
}
505501

506-
public void skipLeadingComments() throws IOException
507-
{
508-
if (_allowComments) {
509-
if ((_inputPtr < _inputEnd) || loadMore()) {
510-
if (_inputBuffer[_inputPtr] == '#') {
511-
_skipCommentLines();
512-
--_inputPtr;
502+
/**
503+
* optionally skip lines that are empty or are comments, depending on the feature activated in the parser
504+
* @return false if the end of input was reached
505+
* @throws IOException
506+
* @since 2.10.1
507+
*/
508+
public boolean skipLinesWhenNeeded() throws IOException {
509+
if (!(_allowComments || _skipBlankLines)) {
510+
return hasMoreInput();
511+
}
512+
int firstCharacterPtr = _inputPtr;
513+
while (hasMoreInput()) {
514+
char ch = _inputBuffer[_inputPtr++];
515+
if (ch == '\r' || ch == '\n') {
516+
_pendingLF = ch;
517+
_handleLF();
518+
// track the start of the new line
519+
firstCharacterPtr = _inputPtr;
520+
continue;
521+
}
522+
if (ch == ' ') {
523+
// skip all blanks (in both comments/blanks skip mode)
524+
continue;
525+
}
526+
if (_allowComments) {
527+
if (_inputBuffer[firstCharacterPtr] == '#') {
528+
// on a commented line, skip everything
529+
continue;
530+
}
531+
if (ch == '#') {
532+
// we reach this point when whitespaces precedes the hash character
533+
// move the firstCharacterPtr to the '#' location in order to skip the line completely
534+
firstCharacterPtr = _inputPtr-1;
535+
continue;
513536
}
514537
}
538+
// we reached a non skippable character, this line needs to be parsed
539+
// rollback the input pointer to the beginning of the line
540+
_inputPtr = firstCharacterPtr;
541+
return true; // processing can go on
515542
}
543+
return false; // end of input
516544
}
517-
545+
518546
protected int _skipCommentLines() throws IOException
519547
{
520548
while ((_inputPtr < _inputEnd) || loadMore()) {

csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/CommentsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// Tests for [csv#56]
99
public class CommentsTest extends ModuleTestBase
1010
{
11-
final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n# another...\n";
11+
final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n # another...\n";
1212

1313
public void testWithoutComments() throws Exception
1414
{
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package com.fasterxml.jackson.dataformat.csv.deser;
2+
3+
import com.fasterxml.jackson.databind.ObjectReader;
4+
import com.fasterxml.jackson.dataformat.csv.CsvParser;
5+
import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;
6+
7+
import static org.junit.Assert.assertArrayEquals;
8+
9+
// for [dataformats-text#15]: Allow skipping of empty lines
10+
public class SkipBlankLines15Test extends ModuleTestBase {
11+
12+
private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n";
13+
private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n";
14+
private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n #comment\n\ntrue,\n";
15+
private static final String CSV_WITH_FIRST_BLANK_LINE = "\n1,\"xyz\"\ntrue,\n";
16+
private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n \n\n";
17+
18+
public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception {
19+
String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE);
20+
// First, verify default behavior:
21+
assertArrayEquals(expected(
22+
row("1", "xyz"),
23+
row(""),
24+
row("true", "")
25+
), rows);
26+
}
27+
28+
public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception {
29+
String[][] rows = mapperForCsvAsArray()
30+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
31+
.readValue(CSV_WITH_EMPTY_LINE);
32+
// empty line is skipped
33+
assertArrayEquals(expected(
34+
row("1", "xyz"),
35+
row("true", "")
36+
), rows);
37+
}
38+
39+
40+
public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
41+
String[][] rows = mapperForCsvAsArray()
42+
.readValue(CSV_WITH_BLANK_LINE);
43+
// First, verify default behavior:
44+
assertArrayEquals(expected(
45+
row("1", "xyz"),
46+
row(" "),
47+
row("true", "")
48+
), rows);
49+
}
50+
51+
public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
52+
String[][] rows = mapperForCsvAsArray()
53+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
54+
.readValue(CSV_WITH_BLANK_LINE);
55+
// blank line is skipped
56+
assertArrayEquals(expected(
57+
row("1", "xyz"),
58+
row("true", "")
59+
), rows);
60+
}
61+
62+
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception {
63+
String[][] rows = mapperForCsvAsArray()
64+
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
65+
// First, verify default behavior:
66+
assertArrayEquals(expected(
67+
row("1", "xyz"),
68+
row(" "),
69+
row(" #comment"),
70+
row(""),
71+
row("true", "")
72+
), rows);
73+
}
74+
75+
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception {
76+
String[][] rows = mapperForCsvAsArray()
77+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
78+
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
79+
// blank/empty lines are skipped
80+
assertArrayEquals(expected(
81+
row("1", "xyz"),
82+
row(" #comment"),
83+
row("true", "")
84+
), rows);
85+
}
86+
87+
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception {
88+
String[][] rows = mapperForCsvAsArray()
89+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
90+
.with(CsvParser.Feature.ALLOW_COMMENTS)
91+
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
92+
// blank/empty/comment lines are skipped
93+
94+
assertArrayEquals(expected(
95+
row("1", "xyz"),
96+
row("true", "")
97+
), rows);
98+
}
99+
100+
public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
101+
String[][] rows = mapperForCsvAsArray()
102+
.readValue(CSV_WITH_FIRST_BLANK_LINE);
103+
// First, verify default behavior:
104+
assertArrayEquals(expected(
105+
row(""),
106+
row("1", "xyz"),
107+
row("true", "")
108+
), rows);
109+
}
110+
111+
public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
112+
String[][] rows = mapperForCsvAsArray()
113+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
114+
.readValue(CSV_WITH_FIRST_BLANK_LINE);
115+
// blank line is skipped
116+
assertArrayEquals(expected(
117+
row("1", "xyz"),
118+
row("true", "")
119+
), rows);
120+
}
121+
122+
123+
public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
124+
String[][] rows = mapperForCsvAsArray()
125+
.readValue(CSV_WITH_TRAILING_BLANK_LINES);
126+
// First, verify default behavior:
127+
assertArrayEquals(expected(
128+
row("1", "xyz"),
129+
row("true", ""),
130+
row(" "),
131+
row("")
132+
), rows);
133+
}
134+
135+
public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
136+
String[][] rows = mapperForCsvAsArray()
137+
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
138+
.readValue(CSV_WITH_FIRST_BLANK_LINE);
139+
// blank lines are skipped
140+
assertArrayEquals(expected(
141+
row("1", "xyz"),
142+
row("true", "")
143+
), rows);
144+
}
145+
146+
private ObjectReader mapperForCsvAsArray() {
147+
// when wrapped as an array, we'll get array of Lists:
148+
return mapperForCsv()
149+
.readerFor(String[][].class)
150+
.with(CsvParser.Feature.WRAP_AS_ARRAY);
151+
}
152+
153+
private String[][] expected(String[]... rowInputs) {
154+
return rowInputs;
155+
}
156+
157+
private String[] row(String... cellInputs) {
158+
return cellInputs;
159+
}
160+
}

0 commit comments

Comments
 (0)