Skip to content

Commit a4404ae

Browse files
committed
Made read_html ignore empty tables, added test
1 parent 62663b4 commit a4404ae

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

pandas/io/html.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,14 @@ def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
711711
else:
712712
raise_with_traceback(retained)
713713

714-
return [_data_to_frame(table, header, index_col, skiprows, infer_types,
715-
parse_dates, tupleize_cols, thousands)
716-
for table in tables]
714+
ret = []
715+
for table in tables:
716+
try:
717+
ret.append(_data_to_frame(table, header, index_col, skiprows,
718+
infer_types, parse_dates, tupleize_cols, thousands))
719+
except StopIteration: # empty table
720+
continue
721+
return ret
717722

718723

719724
def read_html(io, match='.+', flavor=None, header=None, index_col=None,

pandas/io/tests/test_html.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,36 @@ def test_thousands_macau_index_col(self):
401401

402402
self.assertFalse(any(s.isnull().any() for _, s in df.iteritems()))
403403

404+
def test_empty_tables(self):
405+
"""
406+
Make sure that read_html ignores empty tables.
407+
"""
408+
data1 = '''<table>
409+
<thead>
410+
<tr>
411+
<th>A</th>
412+
<th>B</th>
413+
</tr>
414+
</thead>
415+
<tbody>
416+
<tr>
417+
<td>1</td>
418+
<td>2</td>
419+
</tr>
420+
</tbody>
421+
</table>'''
422+
data2 = data1 + '''<table>
423+
<tbody>
424+
<tr>
425+
<td></td>
426+
<td></td>
427+
</tr>
428+
</tbody>
429+
</table>'''
430+
res1 = self.read_html(StringIO(data1))
431+
res2 = self.read_html(StringIO(data2))
432+
assert_framelist_equal(res1, res2)
433+
404434
def test_countries_municipalities(self):
405435
# GH5048
406436
data1 = StringIO('''<table>

0 commit comments

Comments
 (0)