@@ -65,7 +65,12 @@ public class CsvDecoder
65
65
protected boolean _trimSpaces ;
66
66
67
67
protected boolean _allowComments ;
68
-
68
+
69
+ /**
70
+ * @since 2.10.1
71
+ */
72
+ protected boolean _skipBlankLines ; // NOTE: can be final in 3.0, not before
73
+
69
74
/**
70
75
* Maximum of quote character, linefeeds (\r and \n), escape character.
71
76
*/
@@ -111,14 +116,14 @@ public class CsvDecoder
111
116
* needs to be handled (indicates end-of-record).
112
117
*/
113
118
protected int _pendingLF = 0 ;
114
-
119
+
115
120
/**
116
121
* Flag that indicates whether parser is closed or not. Gets
117
122
* set when parser is either closed by explicit call
118
123
* ({@link #close}) or when end-of-input is reached.
119
124
*/
120
125
protected boolean _closed ;
121
-
126
+
122
127
/*
123
128
/**********************************************************************
124
129
/* Current input location information
@@ -152,7 +157,7 @@ public class CsvDecoder
152
157
* For big (gigabyte-sized) sizes are possible, needs to be long,
153
158
* unlike pointers and sizes related to in-memory buffers.
154
159
*/
155
- protected long _tokenInputTotal = 0 ;
160
+ protected long _tokenInputTotal = 0 ;
156
161
157
162
/**
158
163
* Input row on which current token starts, 1-based
@@ -202,8 +207,7 @@ public class CsvDecoder
202
207
203
208
final static double MIN_INT_D = Integer .MIN_VALUE ;
204
209
final static double MAX_INT_D = Integer .MAX_VALUE ;
205
-
206
-
210
+
207
211
// Digits, numeric
208
212
final protected static int INT_0 = '0' ;
209
213
final protected static int INT_1 = '1' ;
@@ -254,18 +258,19 @@ public class CsvDecoder
254
258
/**********************************************************************
255
259
*/
256
260
257
- @ SuppressWarnings ( "deprecation" )
258
- public CsvDecoder ( CsvParser owner , IOContext ctxt , Reader r , CsvSchema schema , TextBuffer textBuffer ,
261
+ public CsvDecoder ( CsvParser owner , IOContext ctxt , Reader r , CsvSchema schema ,
262
+ TextBuffer textBuffer ,
259
263
int stdFeatures , int csvFeatures )
260
264
{
261
265
_owner = owner ;
262
266
_ioContext = ctxt ;
263
267
_inputSource = r ;
264
268
_textBuffer = textBuffer ;
265
269
_autoCloseInput = JsonParser .Feature .AUTO_CLOSE_SOURCE .enabledIn (stdFeatures );
266
- final boolean legacy = JsonParser .Feature .ALLOW_YAML_COMMENTS .enabledIn (stdFeatures );
267
- _allowComments = legacy | CsvParser .Feature .ALLOW_COMMENTS .enabledIn (csvFeatures );
270
+ final boolean oldComments = JsonParser .Feature .ALLOW_YAML_COMMENTS .enabledIn (stdFeatures );
271
+ _allowComments = oldComments | CsvParser .Feature .ALLOW_COMMENTS .enabledIn (csvFeatures );
268
272
_trimSpaces = CsvParser .Feature .TRIM_SPACES .enabledIn (csvFeatures );
273
+ _skipBlankLines = CsvParser .Feature .SKIP_EMPTY_LINES .enabledIn (csvFeatures );
269
274
_inputBuffer = ctxt .allocTokenBuffer ();
270
275
_bufferRecyclable = true ; // since we allocated it
271
276
_inputSource = r ;
@@ -279,7 +284,9 @@ public void setSchema(CsvSchema schema)
279
284
_separatorChar = schema .getColumnSeparator ();
280
285
_quoteChar = schema .getQuoteChar ();
281
286
_escapeChar = schema .getEscapeChar ();
282
- _allowComments = _allowComments | schema .allowsComments ();
287
+ if (!_allowComments ) {
288
+ _allowComments = schema .allowsComments ();
289
+ }
283
290
int max = Math .max (_separatorChar , _quoteChar );
284
291
max = Math .max (max , _escapeChar );
285
292
max = Math .max (max , '\r' );
@@ -292,6 +299,13 @@ public void setSchema(CsvSchema schema)
292
299
*/
293
300
public void overrideFormatFeatures (int csvFeatures ) {
294
301
_trimSpaces = CsvParser .Feature .TRIM_SPACES .enabledIn (csvFeatures );
302
+ _skipBlankLines = CsvParser .Feature .SKIP_EMPTY_LINES .enabledIn (csvFeatures );
303
+
304
+ // 07-Oct-2019, tatu: not 100% accurate, as we have no access to legacy
305
+ // setting. But close enough, fixed in 3.0
306
+ if (CsvParser .Feature .ALLOW_COMMENTS .enabledIn (csvFeatures )) {
307
+ _allowComments = true ;
308
+ }
295
309
}
296
310
297
311
/*
@@ -482,39 +496,53 @@ public boolean startNewLine() throws IOException
482
496
}
483
497
_handleLF ();
484
498
}
485
- /* For now, we will only require that there is SOME data
486
- * following linefeed -- even spaces will do.
487
- * In future we may want to use better heuristics to possibly
488
- * skip trailing empty line?
489
- */
490
- if ((_inputPtr >= _inputEnd ) && !loadMore ()) {
491
- return false ;
492
- }
493
-
494
- if (_allowComments && _inputBuffer [_inputPtr ] == '#' ) {
495
- int i = _skipCommentLines ();
496
- // end-of-input?
497
- if (i < 0 ) {
498
- return false ;
499
- }
500
- // otherwise push last read char back
501
- --_inputPtr ;
502
- }
503
- return true ;
499
+ return skipLinesWhenNeeded ();
504
500
}
505
501
506
- public void skipLeadingComments () throws IOException
507
- {
508
- if (_allowComments ) {
509
- if ((_inputPtr < _inputEnd ) || loadMore ()) {
510
- if (_inputBuffer [_inputPtr ] == '#' ) {
511
- _skipCommentLines ();
512
- --_inputPtr ;
502
+ /**
503
+ * optionally skip lines that are empty or are comments, depending on the feature activated in the parser
504
+ * @return false if the end of input was reached
505
+ * @throws IOException
506
+ * @since 2.10.1
507
+ */
508
+ public boolean skipLinesWhenNeeded () throws IOException {
509
+ if (!(_allowComments || _skipBlankLines )) {
510
+ return hasMoreInput ();
511
+ }
512
+ int firstCharacterPtr = _inputPtr ;
513
+ while (hasMoreInput ()) {
514
+ char ch = _inputBuffer [_inputPtr ++];
515
+ if (ch == '\r' || ch == '\n' ) {
516
+ _pendingLF = ch ;
517
+ _handleLF ();
518
+ // track the start of the new line
519
+ firstCharacterPtr = _inputPtr ;
520
+ continue ;
521
+ }
522
+ if (ch == ' ' ) {
523
+ // skip all blanks (in both comments/blanks skip mode)
524
+ continue ;
525
+ }
526
+ if (_allowComments ) {
527
+ if (_inputBuffer [firstCharacterPtr ] == '#' ) {
528
+ // on a commented line, skip everything
529
+ continue ;
530
+ }
531
+ if (ch == '#' ) {
532
+ // we reach this point when whitespaces precedes the hash character
533
+ // move the firstCharacterPtr to the '#' location in order to skip the line completely
534
+ firstCharacterPtr = _inputPtr -1 ;
535
+ continue ;
513
536
}
514
537
}
538
+ // we reached a non skippable character, this line needs to be parsed
539
+ // rollback the input pointer to the beginning of the line
540
+ _inputPtr = firstCharacterPtr ;
541
+ return true ; // processing can go on
515
542
}
543
+ return false ; // end of input
516
544
}
517
-
545
+
518
546
protected int _skipCommentLines () throws IOException
519
547
{
520
548
while ((_inputPtr < _inputEnd ) || loadMore ()) {
0 commit comments