Skip to content

Commit 7dfe3a3

Browse files
committed
add tests
1 parent 06af43b commit 7dfe3a3

File tree

3 files changed

+297
-58
lines changed

3 files changed

+297
-58
lines changed

tests/test_datasets.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import effector
2+
3+
4+
def test_datasets():
5+
dim = 3
6+
7+
X = effector.datasets.IndependentUniform(dim=dim, low=-1, high=1).generate_data(
8+
1000, seed=21
9+
)
10+
assert X.shape == (1000, dim)
11+
12+
data = effector.datasets.BikeSharing()
13+
data.fetch_and_preprocess()
14+
data.postprocess
15+
assert data.dataset is not None

tests/test_plots.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import numpy as np
2+
import effector
3+
import matplotlib.pyplot as plt
4+
5+
6+
def test_plots():
7+
def generate_dataset(N, x1_min, x1_max, x2_sigma, x3_sigma):
8+
x1 = np.random.uniform(x1_min, x1_max, size=int(N))
9+
x2 = np.random.normal(loc=x1, scale=x2_sigma)
10+
x3 = np.random.uniform(x1_min, x1_max, size=int(N))
11+
return np.stack((x1, x2, x3), axis=-1)
12+
13+
# generate the dataset
14+
np.random.seed(21)
15+
16+
N = 1000
17+
x1_min = 0
18+
x1_max = 1
19+
x2_sigma = 0.1
20+
x3_sigma = 1.0
21+
X = generate_dataset(N, x1_min, x1_max, x2_sigma, x3_sigma)
22+
23+
def predict(x):
24+
y = 7 * x[:, 0] - 3 * x[:, 1] + 4 * x[:, 2]
25+
return y
26+
27+
def predict_grad(x):
28+
df_dx1 = 7 * np.ones([x.shape[0]])
29+
df_dx2 = -3 * np.ones([x.shape[0]])
30+
df_dx3 = 4 * np.ones([x.shape[0]])
31+
return np.stack([df_dx1, df_dx2, df_dx3], axis=-1)
32+
33+
assert all(
34+
[
35+
effector.PDP(data=X, model=predict).plot(
36+
feature=i, y_limits=[-5, 5], show_plot=False
37+
)
38+
is not None
39+
for i in [0, 1, 2]
40+
]
41+
)
42+
43+
assert all(
44+
[
45+
effector.DerPDP(data=X, model=predict, model_jac=predict_grad).plot(
46+
feature=i, heterogeneity=True, dy_limits=[-10, 10], show_plot=False
47+
)
48+
for i in range(3)
49+
]
50+
)
51+
52+
assert all(
53+
[
54+
effector.ALE(data=X, model=predict).plot(
55+
feature=i, y_limits=[-5, 5], dy_limits=[-10, 10], show_plot=False
56+
)
57+
for i in range(3)
58+
]
59+
)
60+
61+
assert all(
62+
[
63+
effector.RHALE(data=X, model=predict, model_jac=predict_grad).plot(
64+
feature=i, y_limits=[-5, 5], dy_limits=[-10, 10], show_plot=False
65+
)
66+
for i in range(3)
67+
]
68+
)
69+
70+
assert all(
71+
[
72+
effector.ShapDP(data=X, model=predict).plot(feature=i, show_plot=False)
73+
for i in range(3)
74+
]
75+
)
76+
77+
x_mean = np.mean(X, axis=0)
78+
x_std = np.std(X, axis=0)
79+
X = (X - x_mean) / x_std
80+
y_mean = np.mean(predict(X))
81+
y_std = np.std(predict(X))
82+
83+
scale_x_list = [{"mean": x_mean[i], "std": x_std[i]} for i in range(X.shape[1])]
84+
scale_y = {"mean": y_mean, "std": y_std}
85+
86+
assert all(
87+
[
88+
effector.PDP(data=X, model=predict).plot(
89+
feature=i,
90+
y_limits=[-5, 5],
91+
show_plot=False,
92+
scale_x=scale_x_list[i],
93+
scale_y=scale_y,
94+
use_vectorized=False,
95+
nof_ice=200,
96+
nof_points=25,
97+
)
98+
is not None
99+
for i in [0, 1, 2]
100+
]
101+
)
102+
plt.close("all")
103+
104+
assert all(
105+
[
106+
effector.PDP(data=X, model=predict).plot(
107+
feature=i,
108+
y_limits=[-5, 5],
109+
heterogeneity=False,
110+
show_plot=False,
111+
scale_x=scale_x_list[i],
112+
scale_y=scale_y,
113+
)
114+
is not None
115+
for i in [0, 1, 2]
116+
]
117+
)
118+
plt.close("all")
119+
assert all(
120+
[
121+
effector.DerPDP(data=X, model=predict, model_jac=predict_grad).plot(
122+
feature=i,
123+
heterogeneity=True,
124+
dy_limits=[-10, 10],
125+
show_plot=False,
126+
scale_x=scale_x_list[i],
127+
scale_y=scale_y,
128+
use_vectorized=False,
129+
nof_ice=200,
130+
nof_points=25,
131+
)
132+
for i in range(3)
133+
]
134+
)
135+
plt.close("all")
136+
assert all(
137+
[
138+
effector.ALE(data=X, model=predict).plot(
139+
feature=i,
140+
y_limits=[-5, 5],
141+
dy_limits=[-10, 10],
142+
show_plot=False,
143+
scale_x=scale_x_list[i],
144+
scale_y=scale_y,
145+
centering=False,
146+
show_avg_output=True,
147+
show_only_aggregated=True,
148+
)
149+
for i in range(3)
150+
]
151+
)
152+
plt.close("all")
153+
assert all(
154+
[
155+
effector.RHALE(data=X, model=predict, model_jac=predict_grad).plot(
156+
feature=i,
157+
y_limits=[-5, 5],
158+
dy_limits=[-10, 10],
159+
show_plot=False,
160+
scale_x=scale_x_list[i],
161+
scale_y=scale_y,
162+
)
163+
for i in range(3)
164+
]
165+
)
166+
plt.close("all")
167+
assert all(
168+
[
169+
effector.ShapDP(data=X, model=predict).plot(
170+
feature=i,
171+
show_plot=False,
172+
scale_x=scale_x_list[i],
173+
scale_y=scale_y,
174+
nof_shap_values=20,
175+
nof_points=25,
176+
only_shap_values=True,
177+
)
178+
for i in range(3)
179+
]
180+
)
181+
plt.close("all")

tests/test_space_partitioning.py

Lines changed: 101 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,103 @@
1-
from effector.space_partitioning import Best
1+
from effector.space_partitioning import *
22
import numpy as np
33

4-
np.random.seed(0)
5-
N = 1000
6-
D = 3
7-
# Generate features uniformly in [0, 10].
8-
X = np.random.uniform(0, 10, size=(N, D))
9-
10-
# Create a target variable y with four groups.
11-
# Group 1: x2 < 3 and x3 < 5 -> label 0
12-
# Group 2: x2 < 3 and x3 >= 5 -> label 1
13-
# Group 3: x2 >= 3 and x2 < 5 -> label 2
14-
# Group 4: x2 >= 3 and x2 >= 5 -> label 3
15-
y = np.empty(N, dtype=int)
16-
for i in range(N):
17-
if X[i, 1] < 3:
18-
y[i] = 0 if X[i, 1] < 1.5 else 1
19-
else:
20-
y[i] = 2 if X[i, 1] < 5 else 3
21-
22-
# Define a heterogeneity function (Gini impurity) that uses the target y.
23-
def heterogeneity(mask):
24-
indices = np.where(mask)[0]
25-
if len(indices) < 50:
26-
return 10000000000
27-
labels = y[indices]
28-
classes, counts = np.unique(labels, return_counts=True)
29-
p = counts / counts.sum()
30-
return 1 - np.sum(p ** 2)
31-
32-
# Set axis limits (min and max for each feature).
33-
axis_limits = np.array([[0, 10], [0, 10], [0, 10]]).T
34-
35-
# We want to allow splits on x1 and x2. To do so, we choose the primary feature as x3 (index 2)
36-
# and explicitly pass candidate conditioning features [0, 1].
37-
best = Best(
38-
min_heterogeneity_decrease_pcg=0.1,
39-
heter_small_enough=0.0,
40-
max_depth=2,
41-
min_samples_leaf=10,
42-
numerical_features_grid_size=20,
43-
search_partitions_when_categorical=False,
44-
)
45-
46-
best.compile(
47-
feature=0, # primary feature (x3) -- not used for splitting in this test.
48-
data=X,
49-
heter_func=heterogeneity,
50-
axis_limits=axis_limits,
51-
candidate_conditioning_features=[0, 1, 2],
52-
feature_names=["x1", "x2", "x3"],
53-
target_name="y"
54-
)
55-
tree = best.fit()
56-
57-
print("Constructed Tree:")
58-
print(tree)
59-
60-
tree.show_full_tree()
4+
5+
def test_space_partitioning():
6+
np.random.seed(0)
7+
N = 1000
8+
D = 3
9+
# Generate features uniformly in [0, 10].
10+
X = np.random.uniform(0, 10, size=(N, D))
11+
12+
# Create a target variable y with four groups.
13+
# Group 1: x2 < 3 and x3 < 5 -> label 0
14+
# Group 2: x2 < 3 and x3 >= 5 -> label 1
15+
# Group 3: x2 >= 3 and x2 < 5 -> label 2
16+
# Group 4: x2 >= 3 and x2 >= 5 -> label 3
17+
y = np.empty(N, dtype=int)
18+
for i in range(N):
19+
if X[i, 1] < 3:
20+
y[i] = 0 if X[i, 1] < 1.5 else 1
21+
else:
22+
y[i] = 2 if X[i, 1] < 5 else 3
23+
24+
# Define a heterogeneity function (Gini impurity) that uses the target y.
25+
def heterogeneity(mask):
26+
indices = np.where(mask)[0]
27+
if len(indices) < 50:
28+
return 10000000000
29+
labels = y[indices]
30+
classes, counts = np.unique(labels, return_counts=True)
31+
p = counts / counts.sum()
32+
return 1 - np.sum(p**2)
33+
34+
def parent_heter_lower(node, is_lower):
35+
if not is_lower:
36+
return False
37+
if node.parent_node is None:
38+
return is_lower
39+
40+
return parent_heter_lower(
41+
node.parent_node,
42+
node.info["weighted_heterogeneity"]
43+
<= node.parent_node.info["weighted_heterogeneity"],
44+
)
45+
46+
# Set axis limits (min and max for each feature).
47+
axis_limits = np.array([[0, 10], [0, 10], [0, 10]]).T
48+
49+
# We want to allow splits on x1 and x2. To do so, we choose the primary feature as x3 (index 2)
50+
# and explicitly pass candidate conditioning features [0, 1].
51+
best = Best(
52+
min_heterogeneity_decrease_pcg=0.1,
53+
heter_small_enough=0.0,
54+
max_depth=2,
55+
min_samples_leaf=10,
56+
numerical_features_grid_size=20,
57+
search_partitions_when_categorical=False,
58+
)
59+
60+
best.compile(
61+
feature=0, # primary feature (x3) -- not used for splitting in this test.
62+
data=X,
63+
heter_func=heterogeneity,
64+
axis_limits=axis_limits,
65+
candidate_conditioning_features=[0, 1, 2],
66+
feature_names=["x1", "x2", "x3"],
67+
target_name="y",
68+
)
69+
tree = best.fit()
70+
71+
# tree.show_full_tree()
72+
73+
assert tree is not None
74+
75+
heter_decreasing_per_level = all([parent_heter_lower(n, True) for n in tree.nodes])
76+
assert heter_decreasing_per_level
77+
78+
############################
79+
80+
best_level_wise = BestLevelWise(
81+
min_heterogeneity_decrease_pcg=0.1,
82+
heter_small_enough=0.0,
83+
max_depth=2,
84+
min_samples_leaf=10,
85+
numerical_features_grid_size=20,
86+
search_partitions_when_categorical=False,
87+
)
88+
89+
best_level_wise.compile(
90+
feature=0, # primary feature (x3) -- not used for splitting in this test.
91+
data=X,
92+
heter_func=heterogeneity,
93+
axis_limits=axis_limits,
94+
candidate_conditioning_features=[0, 1, 2],
95+
feature_names=["x1", "x2", "x3"],
96+
target_name="y",
97+
)
98+
tree = best_level_wise.fit()
99+
100+
assert tree is not None
101+
102+
heter_decreasing_per_level = all([parent_heter_lower(n, True) for n in tree.nodes])
103+
assert heter_decreasing_per_level

0 commit comments

Comments
 (0)