Implement IV 2SLS basic code

sanakang0615 · sanakang0615 · commit 8ff274b8b965 · 2025-09-20T21:48:32.000+09:00
diff --git a/book/ate/iv.ipynb b/book/ate/iv.ipynb
@@ -16,6 +16,217 @@
     "- Weak IV, DML 응용 등 (causal-ml book에 다양한 상황이 제시되어 있음. 실용적인 내용은 최대한 다루기)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "% pip install linearmodels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from linearmodels.iv import IV2SLS"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "push delivered(푸시 메세지 전달)와 In-App 구매력 사이의 연관관계는 인과관계가 될 수 없습니다. 소득이 confouder로 작용하기 때문입니다. (부유한 고객은 최신 스마트폰을 가져 푸시 메세지를 잘 받고, 동시에 In-App 구매력도 높기 때문입니다.)\n",
+    "\n",
+    "IV를 사용할 때, exclusion restriction가 반드시 필요합니다. 이는 정량적으로 검증할 수 없지만, 이 경우에 대해서는 push assigned(푸시 할당)는 랜덤 할당이고 다른 채널이 없기 때문에 exclusion restriction을 쉽게 주장할 수 있습니다. 다시 말해, Push Assigned는 반드시 Push Delivered를 통해서만 구매에 영향을 미칩니다."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>in_app_purchase</th>\n",
+       "      <th>push_assigned</th>\n",
+       "      <th>push_delivered</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>47</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>43</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>51</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>49</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>79</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   in_app_purchase  push_assigned  push_delivered\n",
+       "0               47              1               1\n",
+       "1               43              1               0\n",
+       "2               51              1               1\n",
+       "3               49              0               0\n",
+       "4               79              0               0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.read_csv(\"../data/matheus_data/app_engagement_push.csv\")\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1st Stage (Relevance)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1st Stage: Push Assignment -> Push Delivered\n",
+      "                               Parameter Estimates                               \n",
+      "=================================================================================\n",
+      "               Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI\n",
+      "---------------------------------------------------------------------------------\n",
+      "Intercept       2.22e-16                                                         \n",
+      "push_assigned     0.7176     0.0064     112.07     0.0000      0.7050      0.7301\n",
+      "=================================================================================\n",
+      "Compliance rate: 71.76%\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sanakang/anaconda3/lib/python3.11/site-packages/linearmodels/iv/results.py:198: RuntimeWarning: invalid value encountered in sqrt\n",
+      "  std_errors = sqrt(diag(self.cov))\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"1st Stage: Push Assignment -> Push Delivered\")\n",
+    "first_stage = IV2SLS.from_formula(\"push_delivered ~ 1 + push_assigned\", data).fit()\n",
+    "print(first_stage.summary.tables[1])\n",
+    "print(f\"Compliance rate: {first_stage.params['push_assigned']:.2%}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2SLS Estimation: LATE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                               Parameter Estimates                                \n",
+      "==================================================================================\n",
+      "                Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI\n",
+      "----------------------------------------------------------------------------------\n",
+      "Intercept          69.292     0.3624     191.22     0.0000      68.581      70.002\n",
+      "push_delivered     3.2938     0.7165     4.5974     0.0000      1.8896      4.6981\n",
+      "==================================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "iv_model = IV2SLS.from_formula(\"in_app_purchase ~ 1 + [push_delivered ~ push_assigned]\", data).fit()\n",
+    "print(iv_model.summary.tables[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LATE 추정치: 3.294\n",
+      "95% 신뢰구간: [1.890, 4.698]\n"
+     ]
+    }
+   ],
+   "source": [
+    "late_estimate = iv_model.params['push_delivered']\n",
+    "ci_lower = late_estimate - 1.96 * iv_model.std_errors['push_delivered'] \n",
+    "ci_upper = late_estimate + 1.96 * iv_model.std_errors['push_delivered']\n",
+    "\n",
+    "print(f\"LATE 추정치: {late_estimate:.3f}\")\n",
+    "print(f\"95% 신뢰구간: [{ci_lower:.3f}, {ci_upper:.3f}]\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -47,7 +258,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,