Skip to content

Commit f7353ad

Browse files
rhshadrachGulper Gumus
authored andcommitted
DOC: Standardize noncompliant docstrings in pandas/io/html.py (flake8-docstrings) #61944
1 parent bc6ad14 commit f7353ad

File tree

3 files changed

+46
-13
lines changed

3 files changed

+46
-13
lines changed

pandas/core/reshape/encoding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,9 @@ def from_dummies(
390390
The default category is the implied category when a value has none of the
391391
listed categories specified with a one, i.e. if all dummies in a row are
392392
zero. Can be a single value for all variables or a dict directly mapping
393-
the default categories to a prefix of a variable.
393+
the default categories to a prefix of a variable. The default category
394+
will be coerced to the dtype of ``data.columns`` if such coercion is
395+
lossless, and will raise otherwise.
394396
395397
Returns
396398
-------

pandas/io/html.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""
2-
:mod:`pandas.io.html` is a module containing functionality for dealing with
3-
HTML IO.
2+
:mod:`pandas.io.html` is a module containing functionality for dealing with HTML IO.
43
4+
Provides utilities for reading and parsing HTML tables into pandas DataFrames.
55
"""
66

7+
78
from __future__ import annotations
89

910
from collections import abc
@@ -387,7 +388,7 @@ def _parse_tables(self, document, match, attrs):
387388

388389
def _equals_tag(self, obj, tag) -> bool:
389390
"""
390-
Return whether an individual DOM node matches a tag
391+
Return whether an individual DOM node matches a tag.
391392
392393
Parameters
393394
----------
@@ -399,8 +400,8 @@ def _equals_tag(self, obj, tag) -> bool:
399400
400401
Returns
401402
-------
402-
boolean
403-
Whether `obj`'s tag name is `tag`
403+
bool
404+
Whether `obj`'s tag name is `tag`.
404405
"""
405406
raise AbstractMethodError(self)
406407

@@ -562,7 +563,7 @@ def _expand_colspan_rowspan(
562563

563564
def _handle_hidden_tables(self, tbl_list, attr_name: str):
564565
"""
565-
Return list of tables, potentially removing hidden elements
566+
Return list of tables, potentially removing hidden elements.
566567
567568
Parameters
568569
----------
@@ -679,8 +680,9 @@ def _build_doc(self):
679680

680681
def _build_xpath_expr(attrs) -> str:
681682
"""
682-
Build an xpath expression to simulate bs4's ability to pass in kwargs to
683-
search for attributes when using the lxml parser.
683+
Build an XPath expression to simulate bs4's ability to pass in kwargs.
684+
685+
Search for attributes when using the lxml parser.
684686
685687
Parameters
686688
----------
@@ -689,10 +691,11 @@ def _build_xpath_expr(attrs) -> str:
689691
690692
Returns
691693
-------
692-
expr : unicode
694+
str
693695
An XPath expression that checks for the given HTML attributes.
694696
"""
695697
# give class attribute as class_ because class is a python keyword
698+
696699
if "class_" in attrs:
697700
attrs["class"] = attrs.pop("class_")
698701

@@ -768,6 +771,8 @@ def _equals_tag(self, obj, tag) -> bool:
768771

769772
def _build_doc(self):
770773
"""
774+
Build and parse the HTML document into a DOM tree.
775+
771776
Raises
772777
------
773778
ValueError

pandas/tests/reshape/test_from_dummies.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,7 @@ def test_no_prefix_string_cats_default_category(
333333
):
334334
dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]})
335335
result = from_dummies(dummies, default_category=default_category)
336-
expected = DataFrame(expected)
337-
if using_infer_string:
338-
expected[""] = expected[""].astype("str")
336+
expected = DataFrame(expected, dtype=dummies.columns.dtype)
339337
tm.assert_frame_equal(result, expected)
340338

341339

@@ -449,3 +447,31 @@ def test_maintain_original_index():
449447
result = from_dummies(df)
450448
expected = DataFrame({"": list("abca")}, index=list("abcd"))
451449
tm.assert_frame_equal(result, expected)
450+
451+
452+
def test_int_columns_with_float_default():
453+
# https://github.com/pandas-dev/pandas/pull/60694
454+
df = DataFrame(
455+
{
456+
3: [1, 0, 0],
457+
4: [0, 1, 0],
458+
},
459+
)
460+
with pytest.raises(ValueError, match="Trying to coerce float values to integers"):
461+
from_dummies(df, default_category=0.5)
462+
463+
464+
def test_object_dtype_preserved():
465+
# https://github.com/pandas-dev/pandas/pull/60694
466+
# When the input has object dtype, the result should as
467+
# well even when infer_string is True.
468+
df = DataFrame(
469+
{
470+
"x": [1, 0, 0],
471+
"y": [0, 1, 0],
472+
},
473+
)
474+
df.columns = df.columns.astype("object")
475+
result = from_dummies(df, default_category="z")
476+
expected = DataFrame({"": ["x", "y", "z"]}, dtype="object")
477+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)