add tests

dimitrskpl · dimitrskpl · commit 7dfe3a39f651 · 2025-07-09T16:42:02.000+03:00
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -0,0 +1,15 @@
+import effector
+
+
+def test_datasets():
+    dim = 3
+
+    X = effector.datasets.IndependentUniform(dim=dim, low=-1, high=1).generate_data(
+        1000, seed=21
+    )
+    assert X.shape == (1000, dim)
+
+    data = effector.datasets.BikeSharing()
+    data.fetch_and_preprocess()
+    data.postprocess
+    assert data.dataset is not None
diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -0,0 +1,181 @@
+import numpy as np
+import effector
+import matplotlib.pyplot as plt
+
+
+def test_plots():
+    def generate_dataset(N, x1_min, x1_max, x2_sigma, x3_sigma):
+        x1 = np.random.uniform(x1_min, x1_max, size=int(N))
+        x2 = np.random.normal(loc=x1, scale=x2_sigma)
+        x3 = np.random.uniform(x1_min, x1_max, size=int(N))
+        return np.stack((x1, x2, x3), axis=-1)
+
+    # generate the dataset
+    np.random.seed(21)
+
+    N = 1000
+    x1_min = 0
+    x1_max = 1
+    x2_sigma = 0.1
+    x3_sigma = 1.0
+    X = generate_dataset(N, x1_min, x1_max, x2_sigma, x3_sigma)
+
+    def predict(x):
+        y = 7 * x[:, 0] - 3 * x[:, 1] + 4 * x[:, 2]
+        return y
+
+    def predict_grad(x):
+        df_dx1 = 7 * np.ones([x.shape[0]])
+        df_dx2 = -3 * np.ones([x.shape[0]])
+        df_dx3 = 4 * np.ones([x.shape[0]])
+        return np.stack([df_dx1, df_dx2, df_dx3], axis=-1)
+
+    assert all(
+        [
+            effector.PDP(data=X, model=predict).plot(
+                feature=i, y_limits=[-5, 5], show_plot=False
+            )
+            is not None
+            for i in [0, 1, 2]
+        ]
+    )
+
+    assert all(
+        [
+            effector.DerPDP(data=X, model=predict, model_jac=predict_grad).plot(
+                feature=i, heterogeneity=True, dy_limits=[-10, 10], show_plot=False
+            )
+            for i in range(3)
+        ]
+    )
+
+    assert all(
+        [
+            effector.ALE(data=X, model=predict).plot(
+                feature=i, y_limits=[-5, 5], dy_limits=[-10, 10], show_plot=False
+            )
+            for i in range(3)
+        ]
+    )
+
+    assert all(
+        [
+            effector.RHALE(data=X, model=predict, model_jac=predict_grad).plot(
+                feature=i, y_limits=[-5, 5], dy_limits=[-10, 10], show_plot=False
+            )
+            for i in range(3)
+        ]
+    )
+
+    assert all(
+        [
+            effector.ShapDP(data=X, model=predict).plot(feature=i, show_plot=False)
+            for i in range(3)
+        ]
+    )
+
+    x_mean = np.mean(X, axis=0)
+    x_std = np.std(X, axis=0)
+    X = (X - x_mean) / x_std
+    y_mean = np.mean(predict(X))
+    y_std = np.std(predict(X))
+
+    scale_x_list = [{"mean": x_mean[i], "std": x_std[i]} for i in range(X.shape[1])]
+    scale_y = {"mean": y_mean, "std": y_std}
+
+    assert all(
+        [
+            effector.PDP(data=X, model=predict).plot(
+                feature=i,
+                y_limits=[-5, 5],
+                show_plot=False,
+                scale_x=scale_x_list[i],
+                scale_y=scale_y,
+                use_vectorized=False,
+                nof_ice=200,
+                nof_points=25,
+            )
+            is not None
+            for i in [0, 1, 2]
+        ]
+    )
+    plt.close("all")
+
+    assert all(
+        [
+            effector.PDP(data=X, model=predict).plot(
+                feature=i,
+                y_limits=[-5, 5],
+                heterogeneity=False,
+                show_plot=False,
+                scale_x=scale_x_list[i],
+                scale_y=scale_y,
+            )
+            is not None
+            for i in [0, 1, 2]
+        ]
+    )
+    plt.close("all")
+    assert all(
+        [
+            effector.DerPDP(data=X, model=predict, model_jac=predict_grad).plot(
+                feature=i,
+                heterogeneity=True,
+                dy_limits=[-10, 10],
+                show_plot=False,
+                scale_x=scale_x_list[i],
+                scale_y=scale_y,
+                use_vectorized=False,
+                nof_ice=200,
+                nof_points=25,
+            )
+            for i in range(3)
+        ]
+    )
+    plt.close("all")
+    assert all(
+        [
+            effector.ALE(data=X, model=predict).plot(
+                feature=i,
+                y_limits=[-5, 5],
+                dy_limits=[-10, 10],
+                show_plot=False,
+                scale_x=scale_x_list[i],
+                scale_y=scale_y,
+                centering=False,
+                show_avg_output=True,
+                show_only_aggregated=True,
+            )
+            for i in range(3)
+        ]
+    )
+    plt.close("all")
+    assert all(
+        [
+            effector.RHALE(data=X, model=predict, model_jac=predict_grad).plot(
+                feature=i,
+                y_limits=[-5, 5],
+                dy_limits=[-10, 10],
+                show_plot=False,
+                scale_x=scale_x_list[i],
+                scale_y=scale_y,
+            )
+            for i in range(3)
+        ]
+    )
+    plt.close("all")
+    assert all(
+        [
+            effector.ShapDP(data=X, model=predict).plot(
+                feature=i,
+                show_plot=False,
+                scale_x=scale_x_list[i],
+                scale_y=scale_y,
+                nof_shap_values=20,
+                nof_points=25,
+                only_shap_values=True,
+            )
+            for i in range(3)
+        ]
+    )
+    plt.close("all")
diff --git a/tests/test_space_partitioning.py b/tests/test_space_partitioning.py
@@ -1,60 +1,103 @@
-from effector.space_partitioning import Best
+from effector.space_partitioning import *
 import numpy as np
 
-np.random.seed(0)
-N = 1000
-D = 3
-# Generate features uniformly in [0, 10].
-X = np.random.uniform(0, 10, size=(N, D))
-
-# Create a target variable y with four groups.
-# Group 1: x2 < 3 and x3 < 5  -> label 0
-# Group 2: x2 < 3 and x3 >= 5 -> label 1
-# Group 3: x2 >= 3 and x2 < 5 -> label 2
-# Group 4: x2 >= 3 and x2 >= 5 -> label 3
-y = np.empty(N, dtype=int)
-for i in range(N):
-    if X[i, 1] < 3:
-        y[i] = 0 if X[i, 1] < 1.5 else 1
-    else:
-        y[i] = 2 if X[i, 1] < 5 else 3
-
-# Define a heterogeneity function (Gini impurity) that uses the target y.
-def heterogeneity(mask):
-    indices = np.where(mask)[0]
-    if len(indices) < 50:
-        return 10000000000
-    labels = y[indices]
-    classes, counts = np.unique(labels, return_counts=True)
-    p = counts / counts.sum()
-    return 1 - np.sum(p ** 2)
-
-# Set axis limits (min and max for each feature).
-axis_limits = np.array([[0, 10], [0, 10], [0, 10]]).T
-
-# We want to allow splits on x1 and x2. To do so, we choose the primary feature as x3 (index 2)
-# and explicitly pass candidate conditioning features [0, 1].
-best = Best(
-    min_heterogeneity_decrease_pcg=0.1,
-    heter_small_enough=0.0,
-    max_depth=2,
-    min_samples_leaf=10,
-    numerical_features_grid_size=20,
-    search_partitions_when_categorical=False,
-)
-
-best.compile(
-    feature=0,  # primary feature (x3) -- not used for splitting in this test.
-    data=X,
-    heter_func=heterogeneity,
-    axis_limits=axis_limits,
-    candidate_conditioning_features=[0, 1, 2],
-    feature_names=["x1", "x2", "x3"],
-    target_name="y"
-)
-tree = best.fit()
-
-print("Constructed Tree:")
-print(tree)
-
-tree.show_full_tree()
+
+def test_space_partitioning():
+    np.random.seed(0)
+    N = 1000
+    D = 3
+    # Generate features uniformly in [0, 10].
+    X = np.random.uniform(0, 10, size=(N, D))
+
+    # Create a target variable y with four groups.
+    # Group 1: x2 < 3 and x3 < 5  -> label 0
+    # Group 2: x2 < 3 and x3 >= 5 -> label 1
+    # Group 3: x2 >= 3 and x2 < 5 -> label 2
+    # Group 4: x2 >= 3 and x2 >= 5 -> label 3
+    y = np.empty(N, dtype=int)
+    for i in range(N):
+        if X[i, 1] < 3:
+            y[i] = 0 if X[i, 1] < 1.5 else 1
+        else:
+            y[i] = 2 if X[i, 1] < 5 else 3
+
+    # Define a heterogeneity function (Gini impurity) that uses the target y.
+    def heterogeneity(mask):
+        indices = np.where(mask)[0]
+        if len(indices) < 50:
+            return 10000000000
+        labels = y[indices]
+        classes, counts = np.unique(labels, return_counts=True)
+        p = counts / counts.sum()
+        return 1 - np.sum(p**2)
+
+    def parent_heter_lower(node, is_lower):
+        if not is_lower:
+            return False
+        if node.parent_node is None:
+            return is_lower
+
+        return parent_heter_lower(
+            node.parent_node,
+            node.info["weighted_heterogeneity"]
+            <= node.parent_node.info["weighted_heterogeneity"],
+        )
+
+    # Set axis limits (min and max for each feature).
+    axis_limits = np.array([[0, 10], [0, 10], [0, 10]]).T
+
+    # We want to allow splits on x1 and x2. To do so, we choose the primary feature as x3 (index 2)
+    # and explicitly pass candidate conditioning features [0, 1].
+    best = Best(
+        min_heterogeneity_decrease_pcg=0.1,
+        heter_small_enough=0.0,
+        max_depth=2,
+        min_samples_leaf=10,
+        numerical_features_grid_size=20,
+        search_partitions_when_categorical=False,
+    )
+
+    best.compile(
+        feature=0,  # primary feature (x3) -- not used for splitting in this test.
+        data=X,
+        heter_func=heterogeneity,
+        axis_limits=axis_limits,
+        candidate_conditioning_features=[0, 1, 2],
+        feature_names=["x1", "x2", "x3"],
+        target_name="y",
+    )
+    tree = best.fit()
+
+    # tree.show_full_tree()
+
+    assert tree is not None
+
+    heter_decreasing_per_level = all([parent_heter_lower(n, True) for n in tree.nodes])
+    assert heter_decreasing_per_level
+
+    ############################
+
+    best_level_wise = BestLevelWise(
+        min_heterogeneity_decrease_pcg=0.1,
+        heter_small_enough=0.0,
+        max_depth=2,
+        min_samples_leaf=10,
+        numerical_features_grid_size=20,
+        search_partitions_when_categorical=False,
+    )
+
+    best_level_wise.compile(
+        feature=0,  # primary feature (x3) -- not used for splitting in this test.
+        data=X,
+        heter_func=heterogeneity,
+        axis_limits=axis_limits,
+        candidate_conditioning_features=[0, 1, 2],
+        feature_names=["x1", "x2", "x3"],
+        target_name="y",
+    )
+    tree = best_level_wise.fit()
+
+    assert tree is not None
+
+    heter_decreasing_per_level = all([parent_heter_lower(n, True) for n in tree.nodes])
+    assert heter_decreasing_per_level