givasile
diff --git a/‎calm-experiments/concept_image.ipynb‎
Lines changed: 221 additions & 0 deletions b/‎calm-experiments/concept_image.ipynb‎
Lines changed: 221 additions & 0 deletions
diff --git a/‎calm-experiments/configs/experiment_config_classification.yaml‎
Lines changed: 62 additions & 0 deletions b/‎calm-experiments/configs/experiment_config_classification.yaml‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎calm-experiments/configs/experiment_config_classification_main.yaml‎
Lines changed: 51 additions & 0 deletions b/‎calm-experiments/configs/experiment_config_classification_main.yaml‎
Lines changed: 51 additions & 0 deletions
@@ -0,0 +1,221 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.special import expit  # sigmoid for smooth saturation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Concept Image - Figure 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# f1(x1 | x3)\n",
+    "def f11(x1):\n",
+    "    return 1.2 * x1**3 + 0.1 * x1  # steeper growth early\n",
+    "\n",
+    "def f12(x1):\n",
+    "    return 0.9 * np.tanh(2 * x1)  # saturates later\n",
+    "\n",
+    "def f1(x1, x3):\n",
+    "    y = np.zeros_like(x1)\n",
+    "\n",
+    "    cond = x3 > 0\n",
+    "    y[cond] = f11(x1[cond])\n",
+    "    y[~cond] = f11(x1[~cond])\n",
+    "    return y\n",
+    "\n",
+    "def f21(x2):\n",
+    "    return 0.4 * np.sin(np.pi * x2 / 2 - 0.2) - 0.4  # same shape, lower offset\n",
+    "\n",
+    "def f22(x2):\n",
+    "    return 0.6 * np.sin(np.pi * x2 / 2 - 0.1) - 0.2  # same shape, lower offset\n",
+    "\n",
+    "def f23(x2):\n",
+    "    return 0.8 * np.sin(np.pi * x2 / 2)  # natural bell-like response\n",
+    "\n",
+    "def f2(x2, x1):\n",
+    "    y = np.zeros_like(x2)\n",
+    "\n",
+    "    cond1 = x1 < - 0.4\n",
+    "    cond2 = np.logical_and(x1 >= -0.4, x1 < 0.4 )\n",
+    "    cond3 = x1 >= 0.4\n",
+    "    y[cond1] = f21(x2[cond1])\n",
+    "    y[cond2] = f22(x2[cond2])\n",
+    "    y[cond3] = f23(x2[cond3])\n",
+    "    return y\n",
+    "\n",
+    "\n",
+    "def f3 (x3):\n",
+    "    return  1.5 * (expit(2 * x3) - 0.5)  # maps x3 in [-1,1] to roughly [-0.75, 0.75]\n",
+    "\n",
+    "x = np.linspace(-1, 1, 200)\n",
+    "folder = \"test_synth_user_study/\"\n",
+    "import os \n",
+    "os.makedirs(folder, exist_ok=True)\n",
+    "import matplotlib.ticker as ticker\n",
+    "fontsize=17\n",
+    "# --- Figure 1: f1(x1 | x3) ---\n",
+    "plt.figure()\n",
+    "plt.plot(x, f11(x), label=r\"$f(x_1 \\mid x_3 > 0)$\")\n",
+    "plt.plot(x, f12(x), label=r\"$f(x_1 \\mid x_3 \\leq 0)$\")\n",
+    "\n",
+    "plt.axvline(x=-0.4, color='gray', linestyle=':')\n",
+    "plt.axvline(x=0.4, color='gray', linestyle=':')\n",
+    "plt.text(-0.4, -0.9, r'$x_2\\ (\\uparrow):[0,0.37]$', ha='center', va='top', fontsize=fontsize)\n",
+    "plt.text(0.4, -0.9, r'$x_2\\ (\\uparrow):[0,0.37]$', ha='center', va='top', fontsize=fontsize)\n",
+    "\n",
+    "plt.xticks([])\n",
+    "plt.yticks([])\n",
+    "# plt.ylim(-1.5, 1.5)\n",
+    "plt.legend(fontsize=fontsize,)\n",
+    "plt.tight_layout()\n",
+    "ax = plt.gca()\n",
+    "for spine in ax.spines.values():\n",
+    "    spine.set_visible(False)\n",
+    "\n",
+    "plt.show()\n",
+    "\n",
+    "# --- Figure 2: f2(x2 | x1) ---\n",
+    "plt.figure()\n",
+    "plt.plot(x, f21(x), label=r\"$f(x_2 \\mid x_1 < -0.4)$\")\n",
+    "plt.plot(x, f22(x), label=r\"$f(x_2 \\mid x_1 \\in [-0.4, 0.4])$\")\n",
+    "plt.plot(x, f23(x), label=r\"$f(x_2 \\mid x_1 \\geq 0.4)$\")\n",
+    "\n",
+    "plt.xticks([])\n",
+    "plt.yticks([])\n",
+    "plt.legend(fontsize=fontsize,)\n",
+    "plt.tight_layout()\n",
+    "ax = plt.gca()\n",
+    "for spine in ax.spines.values():\n",
+    "    spine.set_visible(False)\n",
+    "\n",
+    "ax = plt.gca()\n",
+    "\n",
+    "\n",
+    "plt.show()\n",
+    "\n",
+    "# --- Figure 3: f3(x3) ---\n",
+    "plt.figure()\n",
+    "plt.plot(x, f3(x), label=r\"$f_d(x_d)$\")\n",
+    "plt.axvline(x=0, color='gray', linestyle=':')\n",
+    "plt.text(0, -0.4, r'$x_1\\ (\\updownarrow)[-0.42,0,42]$', ha='center', va='top', fontsize=fontsize)\n",
+    "\n",
+    "plt.xticks([])\n",
+    "plt.yticks([])\n",
+    "# plt.ylim(-1.5, 1.5)\n",
+    "plt.legend(fontsize=fontsize,)\n",
+    "plt.tight_layout()\n",
+    "ax = plt.gca()\n",
+    "for spine in ax.spines.values():\n",
+    "    spine.set_visible(False)\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The regions are defined conditioning on the interacting features:\n",
+    "* Τhe effect of x1 conditions on x3 (Cx3 )\n",
+    "* he effect of x2 conditions on x1 (Cx1 )\n",
+    "* xd does not interact with any other feature and thus has a single plot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Figure 2: CALM plot for x1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fontsize = 15\n",
+    "# --- Figure 1: f1(x1 | x3) ---\n",
+    "plt.figure()\n",
+    "plt.plot(x, f11(x), label=r\"$f(x_1 \\mid x_3 > 0)$\")\n",
+    "plt.plot(x, f12(x), label=r\"$f(x_1 \\mid x_3 \\leq 0)$\")\n",
+    "\n",
+    "plt.axvline(x=-0.4, color='gray', linestyle=':')\n",
+    "plt.axvline(x=0.4, color='gray', linestyle=':')\n",
+    "plt.text(-0.4, -0.9, r'$x_2\\ (\\uparrow):[0,0.37]$', ha='center', va='top', fontsize=fontsize)\n",
+    "plt.text(0.4, -0.9, r'$x_2\\ (\\uparrow):[0,0.37]$', ha='center', va='top', fontsize=fontsize)\n",
+    "\n",
+    "plt.xlabel(r\"$x_1$\", fontsize=fontsize)\n",
+    "plt.ylabel(r\"$y$\", fontsize=fontsize)\n",
+    "plt.xticks([-1, -.5, 0, 0.5, 1])\n",
+    "plt.yticks([-1.5, -.75, 0, .75, 1.5])\n",
+    "plt.legend(fontsize=fontsize)\n",
+    "plt.tight_layout()\n",
+    "\n",
+    "ax = plt.gca()\n",
+    "ax.tick_params(axis='both', labelsize=fontsize-1)\n",
+    "\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Each curve gives the contribution of $ x_1 $ to $ y $ in a specific region.\n",
+    "* The blue curve when $ x_3 > 0 $ \n",
+    "* The orange curve when $ x_3 \\leq 0 $\n",
+    "\n",
+    "For example, at $ x_1 = -0.5 $, the contribution is approximately $-0.2$ (blue) or $-0.75$ (orange), depending on $ x_3 $.\n",
+    "The plots also illustrate how altering $ x_1 $ to $x_1 \\rightarrow x_1 + \\delta$ impacts the prediction.\n",
+    "Vertical dotted lines mark points of a hidden discontinuity which is due to $x_1$ participating as an interaction term for feature $x_2$. As shown in previous image, the effect of $ x_2 $ is conditioned by $x_1 \\leq - 0.4 $, $-0.4 \\leq x_1 \\leq 0.4 $ and $x_1 > 0.4$, therefore in this figurer we observe vertical lines in $x_1 \\pm 0.4$.\n",
+    "If a change in $x_1$ does not cross a vertical line, the change in the output $ (\\Delta y) $ equals the curve difference $ (\\Delta f_i )$.\n",
+    "Crossing a line signifies a hidden jump, in the range $[\\alpha, \\beta]$, so \n",
+    "$ \\Delta f_i + a \\leq \\Delta y \\leq \\Delta f_i + \\beta$.\n",
+    "\n",
+    "Arrows provide a fast understanding of the jump:\n",
+    "* $\\uparrow$ means $\\Delta y > \\Delta f_i$,\n",
+    "* $\\downarrow$ means $\\Delta y < \\Delta f_i$,\n",
+    "* $\\updownarrow$ means it depends."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "CALM-ENV",
+   "language": "python",
+   "name": "calm-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,62 @@
+task: classification
+stop_on_error: false
+random_seed: 42
+test_size: 0.2
+output_file: results_classification.csv
+append_to_file: true
+kfold_n_splits: 5
+
+datasets:
+  - name: Adult
+    module: effector.calm.datasets.adult
+  - name: COMPAS
+    module: effector.calm.datasets.compas
+  - name: HELOC
+    module: effector.calm.datasets.heloc
+  - name: MIMIC2
+    module: effector.calm.datasets.mimic2
+  - name: PMLB_APPENDICITIS
+    module: effector.calm.datasets.pmlb
+  - name: PMLB_PHONEME
+    module: effector.calm.datasets.pmlb
+  - name: PMLB_SPECTF
+    module: effector.calm.datasets.pmlb
+  - name: Magic
+    module: effector.calm.datasets.magic
+  - name: Bank
+    module: effector.calm.datasets.bank
+  - name: PMLB_CHURN
+    module: effector.calm.datasets.pmlb
+
+methods:
+  - name: DNNClassifier
+    type: blackbox
+  - name: RFClassifier
+    type: blackbox
+  - name: XGBClassifier
+    type: blackbox
+  - name: MaskedNAMClassifier
+    type: maskedgam
+    parameters: {}
+  - name: PyGAMClassifier
+    type: maskedgam
+    parameters: {}
+  - name: NoInteractionsEBMClassifier
+    type: maskedgam
+    parameters: {}
+  - name: CALMClassifier
+    type: calm
+    parameters:
+      region_detector: ["RegionalPDP", "RegionalRHALE"]
+      masked_gam_name: ["NoInteractionsEBMClassifier", "PyGAMClassifier", "MaskedNAMClassifier"]
+      blackbox_model: ["DNNClassifier", "RFClassifier", "XGBClassifier"]
+  - name: EBM2Classifier
+    type: competitor
+  - name: NodeGAM2Classifier
+    type: competitor
+    parameters:
+      max_time: [300]
+  - name: GAMINetClassifier
+    type: competitor
+    
+metrics: [accuracy, balanced_accuracy, f1]
@@ -0,0 +1,51 @@
+task: classification
+stop_on_error: false
+random_seed: 42
+test_size: 0.2
+output_file: results_classification_main.csv
+append_to_file: true
+kfold_n_splits: 1
+
+datasets:
+  - name: Adult
+    module: effector.calm.datasets.adult
+  - name: COMPAS
+    module: effector.calm.datasets.compas
+  - name: HELOC
+    module: effector.calm.datasets.heloc
+  - name: MIMIC2
+    module: effector.calm.datasets.mimic2
+  - name: PMLB_APPENDICITIS
+    module: effector.calm.datasets.pmlb
+  - name: PMLB_PHONEME
+    module: effector.calm.datasets.pmlb
+  - name: PMLB_SPECTF
+    module: effector.calm.datasets.pmlb
+  - name: Magic
+    module: effector.calm.datasets.magic
+  - name: Bank
+    module: effector.calm.datasets.bank
+  - name: PMLB_CHURN
+    module: effector.calm.datasets.pmlb
+
+methods:
+  - name: XGBClassifier
+    type: blackbox
+  - name: MaskedNAMClassifier
+    type: maskedgam
+    parameters: {}
+  - name: NoInteractionsEBMClassifier
+    type: maskedgam
+    parameters: {}
+  - name: CALMClassifier
+    type: calm
+  - name: EBM2Classifier
+    type: competitor
+  - name: NodeGAM2Classifier
+    type: competitor
+    parameters:
+      max_time: [300]
+  - name: GAMINetClassifier
+    type: competitor
+    
+metrics: [accuracy, balanced_accuracy, f1]