|
23 | 23 | }, |
24 | 24 | { |
25 | 25 | "cell_type": "code", |
26 | | - "execution_count": 289, |
| 26 | + "execution_count": 2, |
27 | 27 | "id": "82d3b359", |
28 | 28 | "metadata": {}, |
29 | | - "outputs": [], |
| 29 | + "outputs": [ |
| 30 | + { |
| 31 | + "name": "stderr", |
| 32 | + "output_type": "stream", |
| 33 | + "text": [ |
| 34 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
| 35 | + " from .autonotebook import tqdm as notebook_tqdm\n" |
| 36 | + ] |
| 37 | + } |
| 38 | + ], |
30 | 39 | "source": [ |
31 | 40 | "import numpy as np\n", |
32 | 41 | "import pandas as pd\n", |
|
64 | 73 | }, |
65 | 74 | { |
66 | 75 | "cell_type": "code", |
67 | | - "execution_count": 290, |
| 76 | + "execution_count": 3, |
68 | 77 | "id": "978da1a6", |
69 | 78 | "metadata": {}, |
70 | 79 | "outputs": [ |
| 80 | + { |
| 81 | + "name": "stderr", |
| 82 | + "output_type": "stream", |
| 83 | + "text": [ |
| 84 | + "/var/folders/fw/d6t_wpzj2zb9lyd3st5l4dfm0000gn/T/ipykernel_39769/3414311417.py:1: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\\s+'`` instead\n", |
| 85 | + " data_mpg = pd.read_csv(\n" |
| 86 | + ] |
| 87 | + }, |
71 | 88 | { |
72 | 89 | "name": "stdout", |
73 | 90 | "output_type": "stream", |
|
188 | 205 | "4 70.0 1.0 ford torino " |
189 | 206 | ] |
190 | 207 | }, |
191 | | - "execution_count": 290, |
| 208 | + "execution_count": 3, |
192 | 209 | "metadata": {}, |
193 | 210 | "output_type": "execute_result" |
194 | 211 | } |
|
210 | 227 | }, |
211 | 228 | { |
212 | 229 | "cell_type": "code", |
213 | | - "execution_count": 291, |
| 230 | + "execution_count": 4, |
214 | 231 | "id": "849d5a67", |
215 | 232 | "metadata": {}, |
216 | 233 | "outputs": [ |
|
321 | 338 | "4 17.0 8.0 302.0 140.0 3449.0 10.5" |
322 | 339 | ] |
323 | 340 | }, |
324 | | - "execution_count": 291, |
| 341 | + "execution_count": 4, |
325 | 342 | "metadata": {}, |
326 | 343 | "output_type": "execute_result" |
327 | 344 | } |
|
367 | 384 | }, |
368 | 385 | { |
369 | 386 | "cell_type": "code", |
370 | | - "execution_count": 292, |
| 387 | + "execution_count": 5, |
371 | 388 | "id": "d9267ebd", |
372 | 389 | "metadata": {}, |
373 | 390 | "outputs": [ |
|
405 | 422 | }, |
406 | 423 | { |
407 | 424 | "cell_type": "code", |
408 | | - "execution_count": 293, |
| 425 | + "execution_count": 6, |
409 | 426 | "id": "e9cff8ed", |
410 | 427 | "metadata": {}, |
411 | 428 | "outputs": [], |
|
451 | 468 | }, |
452 | 469 | { |
453 | 470 | "cell_type": "code", |
454 | | - "execution_count": 294, |
| 471 | + "execution_count": 7, |
455 | 472 | "id": "f7f27d2d", |
456 | 473 | "metadata": {}, |
457 | 474 | "outputs": [ |
|
462 | 479 | "LiNGAM adjacency matrix shape: (6, 6)\n" |
463 | 480 | ] |
464 | 481 | }, |
| 482 | + { |
| 483 | + "name": "stderr", |
| 484 | + "output_type": "stream", |
| 485 | + "text": [ |
| 486 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: divide by zero encountered in matmul\n", |
| 487 | + " return X @ coef_ + self.intercept_\n", |
| 488 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: overflow encountered in matmul\n", |
| 489 | + " return X @ coef_ + self.intercept_\n", |
| 490 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: invalid value encountered in matmul\n", |
| 491 | + " return X @ coef_ + self.intercept_\n", |
| 492 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: divide by zero encountered in matmul\n", |
| 493 | + " return X @ coef_ + self.intercept_\n", |
| 494 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: overflow encountered in matmul\n", |
| 495 | + " return X @ coef_ + self.intercept_\n", |
| 496 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: invalid value encountered in matmul\n", |
| 497 | + " return X @ coef_ + self.intercept_\n", |
| 498 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: divide by zero encountered in matmul\n", |
| 499 | + " return X @ coef_ + self.intercept_\n", |
| 500 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: overflow encountered in matmul\n", |
| 501 | + " return X @ coef_ + self.intercept_\n", |
| 502 | + "/Users/hc.cho/Projects/awesome-causal-inference-python/.venv/lib/python3.10/site-packages/sklearn/linear_model/_base.py:280: RuntimeWarning: invalid value encountered in matmul\n", |
| 503 | + " return X @ coef_ + self.intercept_\n" |
| 504 | + ] |
| 505 | + }, |
465 | 506 | { |
466 | 507 | "data": { |
467 | 508 | "image/svg+xml": [ |
|
592 | 633 | "</svg>\n" |
593 | 634 | ], |
594 | 635 | "text/plain": [ |
595 | | - "<graphviz.graphs.Digraph at 0x345934dc0>" |
| 636 | + "<graphviz.graphs.Digraph at 0x10f8b08e0>" |
596 | 637 | ] |
597 | 638 | }, |
598 | | - "execution_count": 294, |
| 639 | + "execution_count": 7, |
599 | 640 | "metadata": {}, |
600 | 641 | "output_type": "execute_result" |
601 | 642 | } |
|
613 | 654 | }, |
614 | 655 | { |
615 | 656 | "cell_type": "code", |
616 | | - "execution_count": 295, |
| 657 | + "execution_count": 8, |
617 | 658 | "id": "82938c1e", |
618 | 659 | "metadata": {}, |
619 | 660 | "outputs": [ |
|
786 | 827 | }, |
787 | 828 | { |
788 | 829 | "cell_type": "code", |
789 | | - "execution_count": null, |
| 830 | + "execution_count": 9, |
790 | 831 | "id": "6d80c7fa", |
791 | 832 | "metadata": {}, |
792 | 833 | "outputs": [ |
793 | 834 | { |
794 | 835 | "name": "stderr", |
795 | 836 | "output_type": "stream", |
796 | 837 | "text": [ |
797 | | - "Depth=3, working on node 5: 100%|██████████| 6/6 [00:00<00:00, 2326.51it/s]" |
| 838 | + "Depth=3, working on node 5: 100%|██████████| 6/6 [00:00<00:00, 2269.44it/s]" |
798 | 839 | ] |
799 | 840 | }, |
800 | 841 | { |
|
976 | 1017 | "</svg>\n" |
977 | 1018 | ], |
978 | 1019 | "text/plain": [ |
979 | | - "<graphviz.graphs.Digraph at 0x330fbf0a0>" |
| 1020 | + "<graphviz.graphs.Digraph at 0x10ae15360>" |
980 | 1021 | ] |
981 | 1022 | }, |
982 | | - "execution_count": 297, |
| 1023 | + "execution_count": 9, |
983 | 1024 | "metadata": {}, |
984 | 1025 | "output_type": "execute_result" |
985 | 1026 | } |
|
996 | 1037 | }, |
997 | 1038 | { |
998 | 1039 | "cell_type": "code", |
999 | | - "execution_count": 298, |
| 1040 | + "execution_count": 10, |
1000 | 1041 | "id": "8821657c", |
1001 | 1042 | "metadata": {}, |
1002 | 1043 | "outputs": [ |
1003 | 1044 | { |
1004 | 1045 | "name": "stdout", |
1005 | 1046 | "output_type": "stream", |
1006 | 1047 | "text": [ |
1007 | | - "PC edges (±1은 방향/존재 여부 코드):\n" |
| 1048 | + "PC edges:\n" |
1008 | 1049 | ] |
1009 | 1050 | }, |
1010 | 1051 | { |
|
1164 | 1205 | " \"weight\", key=lambda s: s.abs(), ascending=False\n", |
1165 | 1206 | ")\n", |
1166 | 1207 | "\n", |
1167 | | - "print(\"PC edges (±1은 방향/존재 여부 코드):\")\n", |
| 1208 | + "print(\"PC edges:\")\n", |
1168 | 1209 | "display(pc_edges_sorted)" |
1169 | 1210 | ] |
1170 | 1211 | }, |
|
1190 | 1231 | }, |
1191 | 1232 | { |
1192 | 1233 | "cell_type": "code", |
1193 | | - "execution_count": 269, |
| 1234 | + "execution_count": 11, |
1194 | 1235 | "id": "a78c2b78", |
1195 | 1236 | "metadata": {}, |
1196 | 1237 | "outputs": [ |
|
1366 | 1407 | "</svg>\n" |
1367 | 1408 | ], |
1368 | 1409 | "text/plain": [ |
1369 | | - "<graphviz.graphs.Digraph at 0x33547ba90>" |
| 1410 | + "<graphviz.graphs.Digraph at 0x10b179630>" |
1370 | 1411 | ] |
1371 | 1412 | }, |
1372 | | - "execution_count": 269, |
| 1413 | + "execution_count": 11, |
1373 | 1414 | "metadata": {}, |
1374 | 1415 | "output_type": "execute_result" |
1375 | 1416 | } |
|
1387 | 1428 | }, |
1388 | 1429 | { |
1389 | 1430 | "cell_type": "code", |
1390 | | - "execution_count": 270, |
| 1431 | + "execution_count": 12, |
1391 | 1432 | "id": "b627dca4", |
1392 | 1433 | "metadata": {}, |
1393 | 1434 | "outputs": [ |
1394 | 1435 | { |
1395 | 1436 | "name": "stdout", |
1396 | 1437 | "output_type": "stream", |
1397 | 1438 | "text": [ |
1398 | | - "GES edges (±1은 방향/존재 여부 코드):\n" |
| 1439 | + "GES edges:\n" |
1399 | 1440 | ] |
1400 | 1441 | }, |
1401 | 1442 | { |
|
1555 | 1596 | " \"weight\", key=lambda s: s.abs(), ascending=False\n", |
1556 | 1597 | ")\n", |
1557 | 1598 | "\n", |
1558 | | - "print(\"GES edges (±1은 방향/존재 여부 코드):\")\n", |
| 1599 | + "print(\"GES edges:\")\n", |
1559 | 1600 | "display(ges_edges_sorted)\n" |
1560 | 1601 | ] |
1561 | 1602 | }, |
|
1592 | 1633 | "source": [ |
1593 | 1634 | "## Identify and Estimate\n", |
1594 | 1635 | "\n", |
1595 | | - "**weight → mpg** 인과 효과를 추정하는 두 가지 경로를 비교합니다.\n", |
| 1636 | + "weight → mpg 인과 효과를 추정하는 두 가지 경로를 비교합니다.\n", |
1596 | 1637 | "\n", |
1597 | 1638 | "1. LiNGAM이 제안한 DAG를 그대로 쓴 경우 \n", |
1598 | 1639 | "2. 도메인 지식으로 구성한 DAG를 쓴 경우" |
|
1611 | 1652 | }, |
1612 | 1653 | { |
1613 | 1654 | "cell_type": "code", |
1614 | | - "execution_count": 299, |
| 1655 | + "execution_count": 13, |
1615 | 1656 | "id": "4e568856", |
1616 | 1657 | "metadata": {}, |
1617 | 1658 | "outputs": [ |
|
1662 | 1703 | }, |
1663 | 1704 | { |
1664 | 1705 | "cell_type": "code", |
1665 | | - "execution_count": 301, |
| 1706 | + "execution_count": 20, |
1666 | 1707 | "id": "7a4ab4fb", |
1667 | 1708 | "metadata": {}, |
1668 | 1709 | "outputs": [ |
1669 | 1710 | { |
1670 | 1711 | "name": "stdout", |
1671 | 1712 | "output_type": "stream", |
1672 | 1713 | "text": [ |
1673 | | - "=== Identified estimand (LiNGAM DAG 기반) ===\n", |
| 1714 | + "=== Identified estimand LiNGAM DAG ===\n", |
1674 | 1715 | "Estimand type: EstimandType.NONPARAMETRIC_ATE\n", |
1675 | 1716 | "\n", |
1676 | 1717 | "### Estimand : 1\n", |
|
1715 | 1756 | " proceed_when_unidentifiable=True\n", |
1716 | 1757 | ")\n", |
1717 | 1758 | "\n", |
1718 | | - "print(\"=== Identified estimand (LiNGAM DAG 기반) ===\")\n", |
| 1759 | + "print(\"=== Identified estimand LiNGAM DAG ===\")\n", |
1719 | 1760 | "print(identified_estimand_lingam)" |
1720 | 1761 | ] |
1721 | 1762 | }, |
1722 | 1763 | { |
1723 | 1764 | "cell_type": "code", |
1724 | | - "execution_count": 309, |
| 1765 | + "execution_count": 21, |
1725 | 1766 | "id": "6c0f9f33", |
1726 | 1767 | "metadata": {}, |
1727 | 1768 | "outputs": [ |
|
1759 | 1800 | "estimate_lingam = cm_lingam.estimate_effect(\n", |
1760 | 1801 | " identified_estimand_lingam,\n", |
1761 | 1802 | " method_name=\"backdoor.linear_regression\",\n", |
1762 | | - " # \"weight\"가 1 단위 증가할 때 mpg가 얼마나 변하는지\n", |
1763 | 1803 | " control_value=0,\n", |
1764 | 1804 | " treatment_value=1,\n", |
1765 | 1805 | " confidence_intervals=True,\n", |
|
1801 | 1841 | " - `horsepower → mpg` (고출력 엔진은 연비가 낮을 가능성)\n", |
1802 | 1842 | "\n", |
1803 | 1843 | "- 출력 → 가속 성능\n", |
1804 | | - " - `horsepower → acceleration` (마력이 높을수록 0–60 시간이 짧아짐)" |
| 1844 | + " - `horsepower → acceleration` (마력이 높을수록 가속 시간이 짧아짐)" |
1805 | 1845 | ] |
1806 | 1846 | }, |
1807 | 1847 | { |
1808 | 1848 | "cell_type": "code", |
1809 | | - "execution_count": 303, |
| 1849 | + "execution_count": 22, |
1810 | 1850 | "id": "7e38aff1", |
1811 | 1851 | "metadata": {}, |
1812 | 1852 | "outputs": [], |
|
1842 | 1882 | }, |
1843 | 1883 | { |
1844 | 1884 | "cell_type": "code", |
1845 | | - "execution_count": null, |
| 1885 | + "execution_count": 23, |
1846 | 1886 | "id": "f3ef518f", |
1847 | 1887 | "metadata": {}, |
1848 | 1888 | "outputs": [ |
|
1859 | 1899 | { |
1860 | 1900 | "data": { |
1861 | 1901 | "text/plain": [ |
1862 | | - "<dowhy.causal_model.CausalModel at 0x330e3ef50>" |
| 1902 | + "<dowhy.causal_model.CausalModel at 0x169ab6110>" |
1863 | 1903 | ] |
1864 | 1904 | }, |
1865 | 1905 | "metadata": {}, |
|
1880 | 1920 | }, |
1881 | 1921 | { |
1882 | 1922 | "cell_type": "code", |
1883 | | - "execution_count": 279, |
| 1923 | + "execution_count": 24, |
1884 | 1924 | "id": "a98ee829", |
1885 | 1925 | "metadata": {}, |
1886 | 1926 | "outputs": [ |
1887 | 1927 | { |
1888 | 1928 | "name": "stdout", |
1889 | 1929 | "output_type": "stream", |
1890 | 1930 | "text": [ |
1891 | | - "=== Identified Estimand (Domain DAG) ===\n", |
| 1931 | + "=== Identified Estimand Domain DAG ===\n", |
1892 | 1932 | "Estimand type: EstimandType.NONPARAMETRIC_ATE\n", |
1893 | 1933 | "\n", |
1894 | 1934 | "### Estimand : 1\n", |
|
1923 | 1963 | " proceed_when_unidentifiable=True\n", |
1924 | 1964 | ")\n", |
1925 | 1965 | "\n", |
1926 | | - "print(\"=== Identified Estimand (Domain DAG) ===\")\n", |
| 1966 | + "print(\"=== Identified Estimand Domain DAG ===\")\n", |
1927 | 1967 | "print(identified_estimand_domain)\n" |
1928 | 1968 | ] |
1929 | 1969 | }, |
1930 | 1970 | { |
1931 | 1971 | "cell_type": "code", |
1932 | | - "execution_count": 308, |
| 1972 | + "execution_count": 25, |
1933 | 1973 | "id": "509fcdca", |
1934 | 1974 | "metadata": {}, |
1935 | 1975 | "outputs": [ |
|
1966 | 2006 | "estimate_domain = cm_domain.estimate_effect(\n", |
1967 | 2007 | " identified_estimand_domain,\n", |
1968 | 2008 | " method_name=\"backdoor.linear_regression\",\n", |
1969 | | - " # 동일하게 weight가 1 단위 증가할 때의 효과\n", |
1970 | 2009 | " control_value=0,\n", |
1971 | 2010 | " treatment_value=1,\n", |
1972 | 2011 | " confidence_intervals=True,\n", |
|
1979 | 2018 | { |
1980 | 2019 | "cell_type": "code", |
1981 | 2020 | "execution_count": null, |
1982 | | - "id": "d4c9560c", |
| 2021 | + "id": "b8ec4f40", |
1983 | 2022 | "metadata": {}, |
1984 | 2023 | "outputs": [], |
1985 | 2024 | "source": [] |
|
0 commit comments