policy aipw v3

AIgoGracia · AIgoGracia · commit 8f7fb9ac63fc · 2025-10-04T23:43:27.000+09:00
diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
@@ -282,6 +282,224 @@
     "print(f\"Mean outcome (untreated): {np.mean(Y[W == 0]):.6f}\")\n",
     "print(f\"Overall treatment effect: {np.mean(Y[W == 1]) - np.mean(Y[W == 0]):.6f}\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate observational data\n",
+    "np.random.seed(123)\n",
+    "n = 1000\n",
+    "p = 4\n",
+    "X = np.random.uniform(0, 1, (n, p))\n",
+    "e = 1 / (1 + np.exp(-2*(X[:, 0] - 0.5) - 2*(X[:, 1] - 0.5)))  # not observed by analyst\n",
+    "W = np.random.binomial(1, e, n)\n",
+    "Y = 0.5 * (X[:, 0] - 0.5) + (X[:, 1] - 0.5) * W + 0.1 * np.random.randn(n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_norm = (Y - Y.min()) / (Y.max() - Y.min())\n",
+    "\n",
+    "# Plot by treatment status\n",
+    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n",
+    "\n",
+    "# Untreated\n",
+    "untreated_idx = W == 0\n",
+    "for i in np.where(untreated_idx)[0]:\n",
+    "    ax1.scatter(X[i, 0], X[i, 1], marker='D', s=80, \n",
+    "               c=[y_norm[i]], cmap='gray', vmin=0, vmax=1,\n",
+    "               edgecolors='black', linewidths=1)\n",
+    "ax1.set_xlabel('X1')\n",
+    "ax1.set_ylabel('X2')\n",
+    "ax1.set_title('Untreated')\n",
+    "\n",
+    "# Treated\n",
+    "treated_idx = W == 1\n",
+    "for i in np.where(treated_idx)[0]:\n",
+    "    ax2.scatter(X[i, 0], X[i, 1], marker='o', s=100, \n",
+    "               c=[y_norm[i]], cmap='gray', vmin=0, vmax=1,\n",
+    "               edgecolors='black', linewidths=1)\n",
+    "ax2.set_xlabel('X1')\n",
+    "ax2.set_ylabel('X2')\n",
+    "ax2.set_title('Treated')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
+    "from sklearn.model_selection import KFold"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CausalForest:\n",
+    "    \"\"\"\n",
+    "    Simplified Causal Forest implementation to match grf package behavior\n",
+    "    \"\"\"\n",
+    "    def __init__(self, n_estimators=2000, max_features='sqrt', min_samples_leaf=5, \n",
+    "                 honest=True, W_hat=None):\n",
+    "        self.n_estimators = n_estimators\n",
+    "        self.max_features = max_features\n",
+    "        self.min_samples_leaf = min_samples_leaf\n",
+    "        self.honest = honest\n",
+    "        self.W_hat_fixed = W_hat\n",
+    "        \n",
+    "    def fit(self, X, Y, W):\n",
+    "        self.X = X\n",
+    "        self.Y = Y\n",
+    "        self.W = W\n",
+    "        n = len(Y)\n",
+    "        \n",
+    "        # If W.hat is provided (randomized setting), use it\n",
+    "        if self.W_hat_fixed is not None:\n",
+    "            self.W_hat = np.full(n, self.W_hat_fixed)\n",
+    "        else:\n",
+    "            # Estimate propensity score\n",
+    "            ps_model = RandomForestClassifier(\n",
+    "                n_estimators=self.n_estimators//2,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42\n",
+    "            )\n",
+    "            ps_model.fit(X, W)\n",
+    "            self.W_hat = ps_model.predict_proba(X)[:, 1]\n",
+    "            # Clip to avoid division issues\n",
+    "            self.W_hat = np.clip(self.W_hat, 0.01, 0.99)\n",
+    "        \n",
+    "        # Estimate outcome model\n",
+    "        outcome_model = RandomForestRegressor(\n",
+    "            n_estimators=self.n_estimators//2,\n",
+    "            max_features=self.max_features,\n",
+    "            min_samples_leaf=self.min_samples_leaf,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "        outcome_model.fit(X, Y)\n",
+    "        self.Y_hat = outcome_model.predict(X)\n",
+    "        \n",
+    "        # Estimate treatment effects using T-learner\n",
+    "        # Model for treated\n",
+    "        model_1 = RandomForestRegressor(\n",
+    "            n_estimators=self.n_estimators//2,\n",
+    "            max_features=self.max_features,\n",
+    "            min_samples_leaf=self.min_samples_leaf,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "        if np.sum(W == 1) > 0:\n",
+    "            model_1.fit(X[W == 1], Y[W == 1])\n",
+    "            self.mu_1 = model_1.predict(X)\n",
+    "        else:\n",
+    "            self.mu_1 = np.zeros(n)\n",
+    "        \n",
+    "        # Model for control\n",
+    "        model_0 = RandomForestRegressor(\n",
+    "            n_estimators=self.n_estimators//2,\n",
+    "            max_features=self.max_features,\n",
+    "            min_samples_leaf=self.min_samples_leaf,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "        if np.sum(W == 0) > 0:\n",
+    "            model_0.fit(X[W == 0], Y[W == 0])\n",
+    "            self.mu_0 = model_0.predict(X)\n",
+    "        else:\n",
+    "            self.mu_0 = np.zeros(n)\n",
+    "        \n",
+    "        # Treatment effect\n",
+    "        self.tau_hat = self.mu_1 - self.mu_0\n",
+    "        \n",
+    "        return self\n",
+    "    \n",
+    "    def predict(self):\n",
+    "        return {'predictions': self.tau_hat}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fit causal forest\n",
+    "print(\"\\nFitting causal forest...\")\n",
+    "forest = CausalForest()\n",
+    "forest.fit(X, Y, W)\n",
+    "\n",
+    "# Get predictions\n",
+    "tau_hat = forest.predict()['predictions']\n",
+    "\n",
+    "# Estimate outcome models\n",
+    "mu_hat_1 = forest.Y_hat + (1 - forest.W_hat) * tau_hat\n",
+    "mu_hat_0 = forest.Y_hat - forest.W_hat * tau_hat\n",
+    "\n",
+    "# Compute AIPW scores\n",
+    "gamma_hat_1 = mu_hat_1 + W/forest.W_hat * (Y - mu_hat_1)\n",
+    "gamma_hat_0 = mu_hat_0 + (1-W)/(1-forest.W_hat) * (Y - mu_hat_0)\n",
+    "\n",
+    "print(\"Causal forest fitted successfully.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# POLICY EVALUATION WITH AIPW\n",
+    "print(\"\\n--- Policy A (X1 > 0.5 & X2 > 0.5) with AIPW ---\")\n",
+    "pi = (X[:, 0] > 0.5) & (X[:, 1] > 0.5)\n",
+    "gamma_hat_pi = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0\n",
+    "value_estimate = np.mean(gamma_hat_pi)\n",
+    "value_stderr = np.std(gamma_hat_pi) / np.sqrt(len(gamma_hat_pi))\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "\n",
+    "print(\"\\n--- Random Policy (p=0.75) with AIPW ---\")\n",
+    "pi_random = 0.75\n",
+    "gamma_hat_pi = pi_random * gamma_hat_1 + (1 - pi_random) * gamma_hat_0\n",
+    "value_estimate = np.mean(gamma_hat_pi)\n",
+    "value_stderr = np.std(gamma_hat_pi) / np.sqrt(len(gamma_hat_pi))\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "print(\"\\n--- Difference: Policy A vs Never Treat ---\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# AIPW scores for Policy A\n",
+    "pi = (X[:, 0] > 0.5) & (X[:, 1] > 0.5)\n",
+    "gamma_hat_pi = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0\n",
+    "\n",
+    "# AIPW scores for Never Treat\n",
+    "pi_never = 0\n",
+    "gamma_hat_pi_never = pi_never * gamma_hat_1 + (1 - pi_never) * gamma_hat_0\n",
+    "\n",
+    "# Difference\n",
+    "diff_scores = gamma_hat_pi - gamma_hat_pi_never\n",
+    "diff_estimate = np.mean(diff_scores)\n",
+    "diff_stderr = np.std(diff_scores) / np.sqrt(len(diff_scores))\n",
+    "print(f\"diff estimate: {diff_estimate:.10f} Std. Error: {diff_stderr:.10f}\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 70) \n",
+    "print(\"ANALYSIS COMPLETE\")\n",
+    "print(\"=\" * 70)"
+   ]
   }
  ],
  "metadata": {