@@ -507,6 +507,9 @@ def _expand_colspan_rowspan(
507
507
508
508
# Append the text from this <td>, colspan times
509
509
text = _remove_whitespace (self ._text_getter (td ))
510
+ if len (text ) == 0 :
511
+ text = self ._text_getter (td )
512
+
510
513
if self .extract_links in ("all" , section ):
511
514
href = self ._href_getter (td )
512
515
text = (text , href )
@@ -1027,6 +1030,7 @@ def read_html(
1027
1030
extract_links : Literal [None , "header" , "footer" , "body" , "all" ] = None ,
1028
1031
dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
1029
1032
storage_options : StorageOptions = None ,
1033
+ skip_blank_lines : bool = True ,
1030
1034
) -> list [DataFrame ]:
1031
1035
r"""
1032
1036
Read HTML tables into a ``list`` of ``DataFrame`` objects.
@@ -1145,6 +1149,9 @@ def read_html(
1145
1149
{storage_options}
1146
1150
1147
1151
.. versionadded:: 2.1.0
1152
+
1153
+ skip_blank_lines: bool, default True
1154
+ Whether lines containing only spaces should be skipped or not.
1148
1155
1149
1156
Returns
1150
1157
-------
@@ -1201,9 +1208,9 @@ def read_html(
1201
1208
1202
1209
validate_header_arg (header )
1203
1210
check_dtype_backend (dtype_backend )
1204
-
1211
+ print ( "Value passed " , io . getvalue ())
1205
1212
io = stringify_path (io )
1206
-
1213
+ print ( "Inside html.py " , io . getvalue ())
1207
1214
return _parse (
1208
1215
flavor = flavor ,
1209
1216
io = io ,
@@ -1223,4 +1230,5 @@ def read_html(
1223
1230
extract_links = extract_links ,
1224
1231
dtype_backend = dtype_backend ,
1225
1232
storage_options = storage_options ,
1233
+ skip_blank_lines = skip_blank_lines
1226
1234
)
0 commit comments