[ENH/DOC] Support multi-column inputing, change_type (#1163)

Zeroto521 · ericmjl · pre-commit-ci[bot] · web-flow · commit a8a709ef92bb · 2022-09-24T16:03:22.000+08:00
* Support multi-inputing * ENH/DOC: Support multi-column inputing, `change_type` * test this new feature * Update CHANGELOG.md * Simplify a bit * Add example to change multi-columns dtype * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update style of example Co-authored-by: Eric Ma <ericmjl@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,7 @@
 -   [INF] Cancel old workflow runs via Github Action `concurrency`. PR #1161 @Zeroto521
 -   [ENH] Faster computation for non-equi join, with a numba engine. Issue #1102 @samukweku
 -   [BUG] Avoid `change_type` mutating original `DataFrame`. PR #1162 @Zeroto521
+-   [ENH] The parameter `column_name` of `change_type` totally supports inputing multi-column now. #1163 @Zeroto521
 -   [ENH] Fix error when `sort_by_appearance=True` is combined with `dropna=True`. Issue #1168 @samukweku
 
 ## [v0.23.1] - 2022-05-03
diff --git a/janitor/functions/change_type.py b/janitor/functions/change_type.py
@@ -1,4 +1,6 @@
-from typing import Hashable
+from __future__ import annotations
+
+from typing import Any, Hashable
 
 import pandas as pd
 import pandas_flavor as pf
@@ -10,7 +12,7 @@
 @deprecated_alias(column="column_name")
 def change_type(
     df: pd.DataFrame,
-    column_name: Hashable,
+    column_name: Hashable | list[Hashable] | pd.Index,
     dtype: type,
     ignore_exception: bool = False,
 ) -> pd.DataFrame:
@@ -29,7 +31,7 @@ def change_type(
     df[col] = df[col].astype(dtype)
     ```
 
-    Example:
+    Example: Change the type of a column.
 
         >>> import pandas as pd
         >>> import janitor
@@ -49,8 +51,21 @@ def change_type(
         1    1   5.0
         2    2   1.0
 
+    Example: Change the type of multiple columns.
+
+    Change the type of all columns, please use `DataFrame.astype` instead.
+
+        >>> import pandas as pd
+        >>> import janitor
+        >>> df = pd.DataFrame({"col1": range(3), "col2": ["m", 5, True]})
+        >>> df.change_type(['col1', 'col2'], str)
+          col1  col2
+        0    0     m
+        1    1     5
+        2    2  True
+
     :param df: A pandas DataFrame.
-    :param column_name: A column in the dataframe.
+    :param column_name: The column(s) in the dataframe.
     :param dtype: The datatype to convert to. Should be one of the standard
         Python types, or a numpy datatype.
     :param ignore_exception: one of `{False, "fillna", "keep_values"}`.
@@ -65,14 +80,16 @@ def change_type(
     elif ignore_exception == "keep_values":
         df[column_name] = df[column_name].astype(dtype, errors="ignore")
     elif ignore_exception == "fillna":
-        df[column_name] = df[column_name].apply(lambda x: _convert(x, dtype))
+        if isinstance(column_name, Hashable):
+            column_name = [column_name]
+        df[column_name] = df[column_name].applymap(_convert, dtype=dtype)
     else:
         raise ValueError("Unknown option for ignore_exception")
 
     return df
 
 
-def _convert(x, dtype: type):
+def _convert(x: Any, dtype: type) -> Any:
     """Casts item `x` to `dtype` or None if not possible."""
 
     try:
diff --git a/tests/functions/test_change_type.py b/tests/functions/test_change_type.py
@@ -46,6 +46,71 @@ def test_change_type_raise_exception():
         )
 
 
+@pytest.mark.functions
+@pytest.mark.parametrize(
+    "df, column_name, dtype, ignore_exception, expected",
+    [
+        (
+            pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
+            ["a", "b"],
+            str,
+            False,
+            pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
+        ),
+        (
+            pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
+            ["b", "a"],
+            str,
+            False,
+            pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
+        ),
+        (
+            pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
+            ["a"],
+            str,
+            False,
+            pd.DataFrame({"a": ["1", "2"], "b": [3, 4]}),
+        ),
+        (
+            pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
+            pd.Index(["a", "b"]),
+            str,
+            False,
+            pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
+        ),
+        (
+            pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
+            ["a", "b"],
+            str,
+            "keep_values",
+            pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
+        ),
+        (
+            pd.DataFrame({"a": [1, 2], "b": [3, 4]}),
+            ["a", "b"],
+            str,
+            "fillna",
+            pd.DataFrame({"a": ["1", "2"], "b": ["3", "4"]}),
+        ),
+        (
+            pd.DataFrame({"a": ["a", 1], "b": ["b", 2]}),
+            ["a", "b"],
+            int,
+            "fillna",
+            pd.DataFrame({"a": [None, 1], "b": [None, 2]}),
+        ),
+    ],
+)
+def test_multiple_columns(df, column_name, dtype, ignore_exception, expected):
+    result = df.change_type(
+        column_name,
+        dtype=dtype,
+        ignore_exception=ignore_exception,
+    )
+
+    assert_frame_equal(result, expected)
+
+
 @pytest.mark.functions
 def test_original_data_type(dataframe):
     df = pd.DataFrame(range(3), columns=["col1"])