Skip to content

Commit c78703b

Browse files
committed
fix outlier function and add utils tests
1 parent f3c36c6 commit c78703b

File tree

2 files changed

+74
-1
lines changed

2 files changed

+74
-1
lines changed

src/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def outlier_imputer(df: pd.DataFrame, column_list: list[str], iqr_factor: int) -
2424

2525
df.loc[df[col] > upper_threshold, col] = upper_threshold
2626

27-
return df
27+
return df
2828

2929

3030
def rush_hourizer(row: pd.Series) -> int:

tests/test_utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import pandas as pd
2+
3+
from utils import outlier_imputer, rush_hourizer
4+
5+
6+
def test_outlier_imputer_with_positive_outliers():
7+
"""Test outlier_imputer function with positive outliers"""
8+
data = {
9+
"column1": [1, 5, 7, 100, 2000],
10+
"column2": [5, 25, 35, 50, 2000],
11+
}
12+
df = pd.DataFrame(data)
13+
column_list = ["column1", "column2"]
14+
iqr_factor = 1
15+
16+
df_imputed = outlier_imputer(df, column_list, iqr_factor)
17+
18+
assert df_imputed["column1"][3] == 100 # should be replaced by the upper threshold 195
19+
assert df_imputed["column1"][4] == 195 # should be replaced by the upper threshold 195
20+
assert df_imputed["column2"][4] == 75 # should be replaced by the upper threshold 75
21+
22+
23+
def test_outlier_imputer_with_negative_values():
24+
"""Test outlier_imputer function with negative values"""
25+
data = {
26+
"column1": [-10, -5, 0, 5, 10],
27+
"column2": [0, -2, -5, -10, -15],
28+
}
29+
df = pd.DataFrame(data)
30+
column_list = ["column1", "column2"]
31+
iqr_factor = 6
32+
33+
df_imputed = outlier_imputer(df, column_list, iqr_factor)
34+
35+
assert df_imputed["column1"][0] == 0
36+
assert df_imputed["column2"][1] == 0
37+
38+
39+
def test_outlier_imputer_with_empty_column_list():
40+
"""Test outlier_imputer function with an empty column list"""
41+
data = {
42+
"column1": [10, 15, 20, 100, 200],
43+
"column2": [5, 25, 35, 50, 200],
44+
}
45+
df = pd.DataFrame(data)
46+
column_list = []
47+
iqr_factor = 6
48+
49+
df_imputed = outlier_imputer(df, column_list, iqr_factor)
50+
51+
assert df.equals(df_imputed)
52+
53+
54+
def test_rush_hourizer_for_rush_hour():
55+
"""Test rush_hourizer function for rush hours"""
56+
data = {"rush_hour": [7, 8, 17, 19]}
57+
df = pd.DataFrame(data)
58+
59+
assert rush_hourizer(df.iloc[0]) == 1 # 7 is a rush hour, etc
60+
assert rush_hourizer(df.iloc[1]) == 1
61+
assert rush_hourizer(df.iloc[2]) == 1
62+
assert rush_hourizer(df.iloc[3]) == 1
63+
64+
65+
def test_rush_hourizer_for_non_rush_hour():
66+
"""Test rush_hourizer function for non-rush hours"""
67+
data = {"rush_hour": [5, 10, 15, 21]}
68+
df = pd.DataFrame(data)
69+
70+
assert rush_hourizer(df.iloc[0]) == 0 # 5 is not a rush hour, etc
71+
assert rush_hourizer(df.iloc[1]) == 0
72+
assert rush_hourizer(df.iloc[2]) == 0
73+
assert rush_hourizer(df.iloc[3]) == 0

0 commit comments

Comments
 (0)