Skip to content

Commit a8a709e

Browse files
Zeroto521ericmjlpre-commit-ci[bot]
authored
[ENH/DOC] Support multi-column inputing, change_type (#1163)
* Support multi-inputing * ENH/DOC: Support multi-column inputing, `change_type` * test this new feature * Update CHANGELOG.md * Simplify a bit * Add example to change multi-columns dtype * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update style of example Co-authored-by: Eric Ma <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 3279b06 commit a8a709e

File tree

3 files changed

+89
-6
lines changed

3 files changed

+89
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
- [INF] Cancel old workflow runs via Github Action `concurrency`. PR #1161 @Zeroto521
2525
- [ENH] Faster computation for non-equi join, with a numba engine. Issue #1102 @samukweku
2626
- [BUG] Avoid `change_type` mutating original `DataFrame`. PR #1162 @Zeroto521
27+
- [ENH] The parameter `column_name` of `change_type` totally supports inputing multi-column now. #1163 @Zeroto521
2728
- [ENH] Fix error when `sort_by_appearance=True` is combined with `dropna=True`. Issue #1168 @samukweku
2829

2930
## [v0.23.1] - 2022-05-03

janitor/functions/change_type.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from typing import Hashable
1+
from __future__ import annotations
2+
3+
from typing import Any, Hashable
24

35
import pandas as pd
46
import pandas_flavor as pf
@@ -10,7 +12,7 @@
1012
@deprecated_alias(column="column_name")
1113
def change_type(
1214
df: pd.DataFrame,
13-
column_name: Hashable,
15+
column_name: Hashable | list[Hashable] | pd.Index,
1416
dtype: type,
1517
ignore_exception: bool = False,
1618
) -> pd.DataFrame:
@@ -29,7 +31,7 @@ def change_type(
2931
df[col] = df[col].astype(dtype)
3032
```
3133
32-
Example:
34+
Example: Change the type of a column.
3335
3436
>>> import pandas as pd
3537
>>> import janitor
@@ -49,8 +51,21 @@ def change_type(
4951
1 1 5.0
5052
2 2 1.0
5153
54+
Example: Change the type of multiple columns.
55+
56+
Change the type of all columns, please use `DataFrame.astype` instead.
57+
58+
>>> import pandas as pd
59+
>>> import janitor
60+
>>> df = pd.DataFrame({"col1": range(3), "col2": ["m", 5, True]})
61+
>>> df.change_type(['col1', 'col2'], str)
62+
col1 col2
63+
0 0 m
64+
1 1 5
65+
2 2 True
66+
5267
:param df: A pandas DataFrame.
53-
:param column_name: A column in the dataframe.
68+
:param column_name: The column(s) in the dataframe.
5469
:param dtype: The datatype to convert to. Should be one of the standard
5570
Python types, or a numpy datatype.
5671
:param ignore_exception: one of `{False, "fillna", "keep_values"}`.
@@ -65,14 +80,16 @@ def change_type(
6580
elif ignore_exception == "keep_values":
6681
df[column_name] = df[column_name].astype(dtype, errors="ignore")
6782
elif ignore_exception == "fillna":
68-
df[column_name] = df[column_name].apply(lambda x: _convert(x, dtype))
83+
if isinstance(column_name, Hashable):
84+
column_name = [column_name]
85+
df[column_name] = df[column_name].applymap(_convert, dtype=dtype)
6986
else:
7087
raise ValueError("Unknown option for ignore_exception")
7188

7289
return df
7390

7491

75-
def _convert(x, dtype: type):
92+
def _convert(x: Any, dtype: type) -> Any:
7693
"""Casts item `x` to `dtype` or None if not possible."""
7794

7895
try:

tests/functions/test_change_type.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,71 @@ def test_change_type_raise_exception():
4646
)
4747

4848

49+
@pytest.mark.functions
50+
@pytest.mark.parametrize(
51+
"df, column_name, dtype, ignore_exception, expected",
52+
[
53+
(
54+
pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
55+
["a", "b"],
56+
str,
57+
False,
58+
pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
59+
),
60+
(
61+
pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
62+
["b", "a"],
63+
str,
64+
False,
65+
pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
66+
),
67+
(
68+
pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
69+
["a"],
70+
str,
71+
False,
72+
pd.DataFrame({"a": ["1", "2"], "b": [3, 4]}),
73+
),
74+
(
75+
pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
76+
pd.Index(["a", "b"]),
77+
str,
78+
False,
79+
pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
80+
),
81+
(
82+
pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
83+
["a", "b"],
84+
str,
85+
"keep_values",
86+
pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
87+
),
88+
(
89+
pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
90+
["a", "b"],
91+
str,
92+
"fillna",
93+
pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
94+
),
95+
(
96+
pd.DataFrame({"a": ["a", 1], "b": ["b", 2]}),
97+
["a", "b"],
98+
int,
99+
"fillna",
100+
pd.DataFrame({"a": [None, 1], "b": [None, 2]}),
101+
),
102+
],
103+
)
104+
def test_multiple_columns(df, column_name, dtype, ignore_exception, expected):
105+
result = df.change_type(
106+
column_name,
107+
dtype=dtype,
108+
ignore_exception=ignore_exception,
109+
)
110+
111+
assert_frame_equal(result, expected)
112+
113+
49114
@pytest.mark.functions
50115
def test_original_data_type(dataframe):
51116
df = pd.DataFrame(range(3), columns=["col1"])

0 commit comments

Comments
 (0)