Skip to content

Commit cfc8690

Browse files
Zeroto521samukwekuericmjl
authored
[BUG] Avoid change_type mutating original DataFrame (#1162)
* lint via isort * move function to outer * avoid mutating original data * add blank lines * Add annotations for `_convert` * Test it * Update CHANGELOG.md * convert each element to dtype x isn't Series. it's the element of Series * Docs update * fix for health check * Update test_case_when.py Co-authored-by: Samuel Oranyeli <[email protected]> Co-authored-by: Eric Ma <[email protected]>
1 parent bd648d9 commit cfc8690

File tree

4 files changed

+29
-11
lines changed

4 files changed

+29
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
- [ENH] Enable `encode_categorical` handle 2 (or more ) dimensions array. PR #1153 @Zeroto521
2222
- [INF] Cancel old workflow runs via Github Action `concurrency`. PR #1161 @Zeroto521
2323
- [ENH] Faster computation for non-equi join, with a numba engine. Issue #1102 @samukweku
24+
- [BUG] Avoid `change_type` mutating original `DataFrame`. PR #1162 @Zeroto521
2425

2526
## [v0.23.1] - 2022-05-03
2627

janitor/functions/change_type.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import Hashable
2+
23
import pandas as pd
34
import pandas_flavor as pf
45

@@ -15,7 +16,7 @@ def change_type(
1516
) -> pd.DataFrame:
1617
"""Change the type of a column.
1718
18-
This method mutates the original DataFrame.
19+
This method does not mutate the original DataFrame.
1920
2021
Exceptions that are raised can be ignored. For example, if one has a mixed
2122
dtype column that has non-integer strings and integers, and you want to
@@ -57,20 +58,24 @@ def change_type(
5758
:raises ValueError: If unknown option provided for
5859
`ignore_exception`.
5960
"""
61+
62+
df = df.copy() # avoid mutating the original DataFrame
6063
if not ignore_exception:
6164
df[column_name] = df[column_name].astype(dtype)
6265
elif ignore_exception == "keep_values":
6366
df[column_name] = df[column_name].astype(dtype, errors="ignore")
6467
elif ignore_exception == "fillna":
65-
66-
def convert(x, dtype):
67-
"""Casts item `x` to `dtype` or None if not possible."""
68-
try:
69-
return dtype(x)
70-
except ValueError:
71-
return None
72-
73-
df[column_name] = df[column_name].apply(lambda x: convert(x, dtype))
68+
df[column_name] = df[column_name].apply(lambda x: _convert(x, dtype))
7469
else:
7570
raise ValueError("Unknown option for ignore_exception")
71+
7672
return df
73+
74+
75+
def _convert(x, dtype: type):
76+
"""Casts item `x` to `dtype` or None if not possible."""
77+
78+
try:
79+
return dtype(x)
80+
except ValueError:
81+
return None

tests/functions/test_case_when.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def test_default_ndim(df):
7676

7777

7878
@pytest.mark.turtle
79-
@settings(deadline=None)
8079
@given(df=df_strategy())
80+
@settings(deadline=None)
8181
def test_default_length(df):
8282
"""Raise ValueError if `default` length != len(df)."""
8383
assume(len(df) > 10)

tests/functions/test_change_type.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pandas as pd
33
import pytest
4+
from pandas.testing import assert_frame_equal
45

56

67
@pytest.mark.functions
@@ -43,3 +44,14 @@ def test_change_type_raise_exception():
4344
df = df.change_type(
4445
column_name="col1", dtype=float, ignore_exception=False
4546
)
47+
48+
49+
@pytest.mark.functions
50+
def test_original_data_type(dataframe):
51+
df = pd.DataFrame(range(3), columns=["col1"])
52+
df_original = df.copy()
53+
54+
df.change_type("col1", dtype=str)
55+
56+
# 'cols' is still int type not str type
57+
assert_frame_equal(df, df_original)

0 commit comments

Comments
 (0)