|
| 1 | +import pandas as pd |
| 2 | + |
| 3 | +from utils import outlier_imputer, rush_hourizer |
| 4 | + |
| 5 | + |
| 6 | +def test_outlier_imputer_with_positive_outliers(): |
| 7 | + """Test outlier_imputer function with positive outliers""" |
| 8 | + data = { |
| 9 | + "column1": [1, 5, 7, 100, 2000], |
| 10 | + "column2": [5, 25, 35, 50, 2000], |
| 11 | + } |
| 12 | + df = pd.DataFrame(data) |
| 13 | + column_list = ["column1", "column2"] |
| 14 | + iqr_factor = 1 |
| 15 | + |
| 16 | + df_imputed = outlier_imputer(df, column_list, iqr_factor) |
| 17 | + |
| 18 | + assert df_imputed["column1"][3] == 100 # should be replaced by the upper threshold 195 |
| 19 | + assert df_imputed["column1"][4] == 195 # should be replaced by the upper threshold 195 |
| 20 | + assert df_imputed["column2"][4] == 75 # should be replaced by the upper threshold 75 |
| 21 | + |
| 22 | + |
| 23 | +def test_outlier_imputer_with_negative_values(): |
| 24 | + """Test outlier_imputer function with negative values""" |
| 25 | + data = { |
| 26 | + "column1": [-10, -5, 0, 5, 10], |
| 27 | + "column2": [0, -2, -5, -10, -15], |
| 28 | + } |
| 29 | + df = pd.DataFrame(data) |
| 30 | + column_list = ["column1", "column2"] |
| 31 | + iqr_factor = 6 |
| 32 | + |
| 33 | + df_imputed = outlier_imputer(df, column_list, iqr_factor) |
| 34 | + |
| 35 | + assert df_imputed["column1"][0] == 0 |
| 36 | + assert df_imputed["column2"][1] == 0 |
| 37 | + |
| 38 | + |
| 39 | +def test_outlier_imputer_with_empty_column_list(): |
| 40 | + """Test outlier_imputer function with an empty column list""" |
| 41 | + data = { |
| 42 | + "column1": [10, 15, 20, 100, 200], |
| 43 | + "column2": [5, 25, 35, 50, 200], |
| 44 | + } |
| 45 | + df = pd.DataFrame(data) |
| 46 | + column_list = [] |
| 47 | + iqr_factor = 6 |
| 48 | + |
| 49 | + df_imputed = outlier_imputer(df, column_list, iqr_factor) |
| 50 | + |
| 51 | + assert df.equals(df_imputed) |
| 52 | + |
| 53 | + |
| 54 | +def test_rush_hourizer_for_rush_hour(): |
| 55 | + """Test rush_hourizer function for rush hours""" |
| 56 | + data = {"rush_hour": [7, 8, 17, 19]} |
| 57 | + df = pd.DataFrame(data) |
| 58 | + |
| 59 | + assert rush_hourizer(df.iloc[0]) == 1 # 7 is a rush hour, etc |
| 60 | + assert rush_hourizer(df.iloc[1]) == 1 |
| 61 | + assert rush_hourizer(df.iloc[2]) == 1 |
| 62 | + assert rush_hourizer(df.iloc[3]) == 1 |
| 63 | + |
| 64 | + |
| 65 | +def test_rush_hourizer_for_non_rush_hour(): |
| 66 | + """Test rush_hourizer function for non-rush hours""" |
| 67 | + data = {"rush_hour": [5, 10, 15, 21]} |
| 68 | + df = pd.DataFrame(data) |
| 69 | + |
| 70 | + assert rush_hourizer(df.iloc[0]) == 0 # 5 is not a rush hour, etc |
| 71 | + assert rush_hourizer(df.iloc[1]) == 0 |
| 72 | + assert rush_hourizer(df.iloc[2]) == 0 |
| 73 | + assert rush_hourizer(df.iloc[3]) == 0 |
0 commit comments