@@ -65,8 +65,8 @@ class ParserWarning(Warning):
6565 a list of integers that specify row locations for a multi-index on the
6666 columns E.g. [0,1,3]. Intervening rows that are not specified will be
6767 skipped (e.g. 2 in this example are skipped). Note that this parameter
68- ignores commented lines, so header=0 denotes the first line of
69- data rather than the first line of the file.
68+ ignores commented lines and empty lines if ``skip_blank_lines=True``, so header=0
69+ denotes the first line of data rather than the first line of the file.
7070skiprows : list-like or integer
7171 Line numbers to skip (0-indexed) or number of lines to skip (int)
7272 at the start of the file
@@ -110,10 +110,11 @@ class ParserWarning(Warning):
110110comment : str, default None
111111 Indicates remainder of line should not be parsed. If found at the
112112 beginning of a line, the line will be ignored altogether. This parameter
113- must be a single character. Also, fully commented lines
114- are ignored by the parameter `header` but not by `skiprows`. For example,
115- if comment='#', parsing '#empty\n 1,2,3\n a,b,c' with `header=0` will
116- result in '1,2,3' being treated as the header.
113+ must be a single character. Like empty lines (as long as ``skip_blank_lines=True``),
114+ fully commented lines are ignored by the parameter `header`
115+ but not by `skiprows`. For example, if comment='#', parsing
116+ '#empty\n 1,2,3\n a,b,c' with `header=0` will result in '1,2,3' being
117+ treated as the header.
117118decimal : str, default '.'
118119 Character to recognize as decimal point. E.g. use ',' for European data
119120nrows : int, default None
@@ -160,6 +161,8 @@ class ParserWarning(Warning):
160161infer_datetime_format : boolean, default False
161162 If True and parse_dates is enabled for a column, attempt to infer
162163 the datetime format to speed up the processing
164+ skip_blank_lines : boolean, default True
165+ If True, skip over blank lines rather than interpreting as NaN values
163166
164167Returns
165168-------
@@ -288,6 +291,7 @@ def _read(filepath_or_buffer, kwds):
288291 'mangle_dupe_cols' : True ,
289292 'tupleize_cols' : False ,
290293 'infer_datetime_format' : False ,
294+ 'skip_blank_lines' : True
291295}
292296
293297
@@ -380,7 +384,8 @@ def parser_f(filepath_or_buffer,
380384 squeeze = False ,
381385 mangle_dupe_cols = True ,
382386 tupleize_cols = False ,
383- infer_datetime_format = False ):
387+ infer_datetime_format = False ,
388+ skip_blank_lines = True ):
384389
385390 # Alias sep -> delimiter.
386391 if delimiter is None :
@@ -452,7 +457,8 @@ def parser_f(filepath_or_buffer,
452457 buffer_lines = buffer_lines ,
453458 mangle_dupe_cols = mangle_dupe_cols ,
454459 tupleize_cols = tupleize_cols ,
455- infer_datetime_format = infer_datetime_format )
460+ infer_datetime_format = infer_datetime_format ,
461+ skip_blank_lines = skip_blank_lines )
456462
457463 return _read (filepath_or_buffer , kwds )
458464
@@ -1346,6 +1352,7 @@ def __init__(self, f, **kwds):
13461352 self .quoting = kwds ['quoting' ]
13471353 self .mangle_dupe_cols = kwds .get ('mangle_dupe_cols' , True )
13481354 self .usecols = kwds ['usecols' ]
1355+ self .skip_blank_lines = kwds ['skip_blank_lines' ]
13491356
13501357 self .names_passed = kwds ['names' ] or None
13511358
@@ -1401,6 +1408,7 @@ def __init__(self, f, **kwds):
14011408
14021409 # needs to be cleaned/refactored
14031410 # multiple date column thing turning into a real spaghetti factory
1411+
14041412 if not self ._has_complex_date_col :
14051413 (index_names ,
14061414 self .orig_names , self .columns ) = self ._get_index_name (self .columns )
@@ -1598,6 +1606,7 @@ def _infer_columns(self):
15981606
15991607 while self .line_pos <= hr :
16001608 line = self ._next_line ()
1609+
16011610 unnamed_count = 0
16021611 this_columns = []
16031612 for i , c in enumerate (line ):
@@ -1735,25 +1744,35 @@ def _next_line(self):
17351744 line = self ._check_comments ([self .data [self .pos ]])[0 ]
17361745 self .pos += 1
17371746 # either uncommented or blank to begin with
1738- if self ._empty (self .data [self .pos - 1 ]) or line :
1747+ if not self .skip_blank_lines and (self ._empty (self .data [
1748+ self .pos - 1 ]) or line ):
17391749 break
1750+ elif self .skip_blank_lines :
1751+ ret = self ._check_empty ([line ])
1752+ if ret :
1753+ line = ret [0 ]
1754+ break
17401755 except IndexError :
17411756 raise StopIteration
17421757 else :
17431758 while self .pos in self .skiprows :
1744- next (self .data )
17451759 self .pos += 1
1760+ next (self .data )
17461761
17471762 while True :
17481763 orig_line = next (self .data )
17491764 line = self ._check_comments ([orig_line ])[0 ]
17501765 self .pos += 1
1751- if self ._empty (orig_line ) or line :
1766+ if not self .skip_blank_lines and ( self . _empty (orig_line ) or line ) :
17521767 break
1768+ elif self .skip_blank_lines :
1769+ ret = self ._check_empty ([line ])
1770+ if ret :
1771+ line = ret [0 ]
1772+ break
17531773
17541774 self .line_pos += 1
17551775 self .buf .append (line )
1756-
17571776 return line
17581777
17591778 def _check_comments (self , lines ):
@@ -1774,6 +1793,15 @@ def _check_comments(self, lines):
17741793 ret .append (rl )
17751794 return ret
17761795
1796+ def _check_empty (self , lines ):
1797+ ret = []
1798+ for l in lines :
1799+ # Remove empty lines and lines with only one whitespace value
1800+ if len (l ) > 1 or len (l ) == 1 and (not isinstance (l [0 ],
1801+ compat .string_types ) or l [0 ].strip ()):
1802+ ret .append (l )
1803+ return ret
1804+
17771805 def _check_thousands (self , lines ):
17781806 if self .thousands is None :
17791807 return lines
@@ -1909,7 +1937,6 @@ def _get_lines(self, rows=None):
19091937
19101938 # already fetched some number
19111939 if rows is not None :
1912-
19131940 # we already have the lines in the buffer
19141941 if len (self .buf ) >= rows :
19151942 new_rows , self .buf = self .buf [:rows ], self .buf [rows :]
@@ -1974,6 +2001,8 @@ def _get_lines(self, rows=None):
19742001 lines = lines [:- self .skip_footer ]
19752002
19762003 lines = self ._check_comments (lines )
2004+ if self .skip_blank_lines :
2005+ lines = self ._check_empty (lines )
19772006 return self ._check_thousands (lines )
19782007
19792008
0 commit comments