Skip to content

Commit 40b30a2

Browse files
조해창조해창
authored andcommitted
fix: 리뷰 반영(BDC code 수정, overview 수정)
1 parent 0458ef7 commit 40b30a2

File tree

5 files changed

+113
-111
lines changed

5 files changed

+113
-111
lines changed

.DS_Store

-6 KB
Binary file not shown.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,5 @@ _build/
136136

137137
# poetry files
138138
pyproject.toml
139+
140+
.DS_Store

book/scm/backdoor_criterion.ipynb

Lines changed: 58 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
},
3030
{
3131
"cell_type": "code",
32-
"execution_count": 2,
32+
"execution_count": 1,
3333
"id": "dd8821be",
3434
"metadata": {},
3535
"outputs": [],
@@ -62,7 +62,7 @@
6262
},
6363
{
6464
"cell_type": "code",
65-
"execution_count": 3,
65+
"execution_count": 2,
6666
"id": "3f63bbe8",
6767
"metadata": {},
6868
"outputs": [
@@ -282,7 +282,7 @@
282282
"[5 rows x 67 columns]"
283283
]
284284
},
285-
"execution_count": 3,
285+
"execution_count": 2,
286286
"metadata": {},
287287
"output_type": "execute_result"
288288
}
@@ -320,7 +320,7 @@
320320
},
321321
{
322322
"cell_type": "code",
323-
"execution_count": 4,
323+
"execution_count": 3,
324324
"id": "ff2f4b4b",
325325
"metadata": {},
326326
"outputs": [
@@ -358,23 +358,6 @@
358358
"print(\"처치군/대조군:\\n\", df_clean[treatment].value_counts())"
359359
]
360360
},
361-
{
362-
"cell_type": "code",
363-
"execution_count": 5,
364-
"id": "d985c724",
365-
"metadata": {},
366-
"outputs": [],
367-
"source": [
368-
"# 범주형 변수 원핫 인코딩\n",
369-
"categorical_vars = [\"sex\", \"race\", \"education\", \"active\", \"exercise\"]\n",
370-
"df_encoded = pd.get_dummies(df_clean, columns=categorical_vars, drop_first=True)\n",
371-
"\n",
372-
"# 연속형 변수 스케일링\n",
373-
"numeric_confounders = [\"age\", \"smokeintensity\", \"smokeyrs\", \"wt71\"]\n",
374-
"scaler = StandardScaler()\n",
375-
"df_encoded[numeric_confounders] = scaler.fit_transform(df_encoded[numeric_confounders])"
376-
]
377-
},
378361
{
379362
"cell_type": "markdown",
380363
"id": "25ebfea3",
@@ -385,7 +368,7 @@
385368
},
386369
{
387370
"cell_type": "code",
388-
"execution_count": 21,
371+
"execution_count": 5,
389372
"id": "65df40db",
390373
"metadata": {},
391374
"outputs": [
@@ -401,8 +384,7 @@
401384
}
402385
],
403386
"source": [
404-
"# 시각화용 DAG (원핫 인코딩 전 변수 기준)\n",
405-
"gml_graph_viz = \"\"\"\n",
387+
"gml_graph = \"\"\"\n",
406388
"graph [\n",
407389
" directed 1\n",
408390
"\n",
@@ -442,41 +424,12 @@
442424
"]\n",
443425
"\"\"\"\n",
444426
"\n",
445-
"cm_for_viz = CausalModel(data=df_clean, treatment=treatment, outcome=outcome, graph=gml_graph_viz)\n",
446-
"cm_for_viz.view_model(\n",
427+
"cm = CausalModel(data=df_clean, treatment=treatment, outcome=outcome, graph=gml_graph)\n",
428+
"cm.view_model(\n",
447429
" layout=\"dot\"\n",
448430
")"
449431
]
450432
},
451-
{
452-
"cell_type": "code",
453-
"execution_count": 7,
454-
"id": "2539d612",
455-
"metadata": {},
456-
"outputs": [],
457-
"source": [
458-
"# 추정용 DAG\n",
459-
"# - 모든 공변량(원핫 포함) -> {qsmk, wt82_71}\n",
460-
"# - qsmk -> wt82_71\n",
461-
"\n",
462-
"all_cols = df_encoded.columns.tolist()\n",
463-
"confounder_cols = [c for c in all_cols if c not in [treatment, outcome]]\n",
464-
"\n",
465-
"gml_nodes = []\n",
466-
"gml_nodes.append(f' node [ id \"{treatment}\" label \"{treatment}\" ]')\n",
467-
"gml_nodes.append(f' node [ id \"{outcome}\" label \"{outcome}\" ]')\n",
468-
"for c in confounder_cols:\n",
469-
" gml_nodes.append(f' node [ id \"{c}\" label \"{c}\" ]')\n",
470-
"\n",
471-
"gml_edges = []\n",
472-
"for c in confounder_cols:\n",
473-
" gml_edges.append(f' edge [ source \"{c}\" target \"{treatment}\" ]')\n",
474-
" gml_edges.append(f' edge [ source \"{c}\" target \"{outcome}\" ]')\n",
475-
"gml_edges.append(f' edge [ source \"{treatment}\" target \"{outcome}\" ]')\n",
476-
"\n",
477-
"gml_graph = 'graph [\\n directed 1\\n\\n' + \"\\n\".join(gml_nodes) + \"\\n\\n\" + \"\\n\".join(gml_edges) + \"\\n]\""
478-
]
479-
},
480433
{
481434
"cell_type": "markdown",
482435
"id": "5aa85b75",
@@ -487,7 +440,7 @@
487440
},
488441
{
489442
"cell_type": "code",
490-
"execution_count": 8,
443+
"execution_count": null,
491444
"id": "5d714d62",
492445
"metadata": {},
493446
"outputs": [
@@ -501,13 +454,13 @@
501454
"Estimand name: backdoor\n",
502455
"Estimand expression:\n",
503456
" d ↪\n",
504-
"───────(E[wt_82_71|sex_1,active_1,education_4,race_1,education_2,wt71,educatio\n",
457+
"───────(E[wt_82_71|sex,age,smokeyrs,race,active,wt71,education,exercise,smokei\n",
505458
"d[qsmk] ↪\n",
506459
"\n",
507-
" \n",
508-
"n_5,active_2,smokeintensity,exercise_2,exercise_1,education_3,smokeyrs,age])\n",
509-
" \n",
510-
"Estimand assumption 1, Unconfoundedness: If U→{qsmk} and U→wt82_71 then P(wt82_71|qsmk,sex_1,active_1,education_4,race_1,education_2,wt71,education_5,active_2,smokeintensity,exercise_2,exercise_1,education_3,smokeyrs,age,U) = P(wt82_71|qsmk,sex_1,active_1,education_4,race_1,education_2,wt71,education_5,active_2,smokeintensity,exercise_2,exercise_1,education_3,smokeyrs,age)\n",
460+
"\n",
461+
"ntensity])\n",
462+
"\n",
463+
"Estimand assumption 1, Unconfoundedness: If U→{qsmk} and U→wt82_71 then P(wt82_71|qsmk,sex,age,smokeyrs,race,active,wt71,education,exercise,smokeintensity,U) = P(wt82_71|qsmk,sex,age,smokeyrs,race,active,wt71,education,exercise,smokeintensity)\n",
511464
"\n",
512465
"### Estimand : 2\n",
513466
"Estimand name: iv\n",
@@ -521,20 +474,20 @@
521474
"Estimand name: general_adjustment\n",
522475
"Estimand expression:\n",
523476
" d ↪\n",
524-
"───────(E[wt_82_71|sex_1,active_1,education_4,race_1,education_2,wt71,educatio\n",
477+
"───────(E[wt_82_71|sex,age,smokeyrs,race,active,wt71,education,exercise,smokei\n",
525478
"d[qsmk] ↪\n",
526479
"\n",
527-
" \n",
528-
"n_5,active_2,smokeintensity,exercise_2,exercise_1,education_3,smokeyrs,age])\n",
529-
" \n",
530-
"Estimand assumption 1, Unconfoundedness: If U→{qsmk} and U→wt82_71 then P(wt82_71|qsmk,sex_1,active_1,education_4,race_1,education_2,wt71,education_5,active_2,smokeintensity,exercise_2,exercise_1,education_3,smokeyrs,age,U) = P(wt82_71|qsmk,sex_1,active_1,education_4,race_1,education_2,wt71,education_5,active_2,smokeintensity,exercise_2,exercise_1,education_3,smokeyrs,age)\n",
480+
"\n",
481+
"ntensity])\n",
482+
"\n",
483+
"Estimand assumption 1, Unconfoundedness: If U→{qsmk} and U→wt82_71 then P(wt82_71|qsmk,sex,age,smokeyrs,race,active,wt71,education,exercise,smokeintensity,U) = P(wt82_71|qsmk,sex,age,smokeyrs,race,active,wt71,education,exercise,smokeintensity)\n",
531484
"\n"
532485
]
533486
}
534487
],
535488
"source": [
536489
"est_model = CausalModel(\n",
537-
" data=df_encoded,\n",
490+
" data=df_clean,\n",
538491
" treatment=treatment,\n",
539492
" outcome=outcome,\n",
540493
" graph=gml_graph\n",
@@ -579,7 +532,7 @@
579532
},
580533
{
581534
"cell_type": "code",
582-
"execution_count": 9,
535+
"execution_count": 7,
583536
"id": "830fe396",
584537
"metadata": {},
585538
"outputs": [
@@ -588,7 +541,7 @@
588541
"output_type": "stream",
589542
"text": [
590543
"\n",
591-
"[ATE] Linear Regression: 3.3811710339880983\n"
544+
"[ATE] Linear Regression: 3.3811710339880823\n"
592545
]
593546
}
594547
],
@@ -610,18 +563,18 @@
610563
},
611564
{
612565
"cell_type": "code",
613-
"execution_count": 10,
566+
"execution_count": 8,
614567
"id": "a7aa0ee5",
615568
"metadata": {},
616569
"outputs": [
617570
{
618571
"name": "stdout",
619572
"output_type": "stream",
620573
"text": [
621-
"[ATE] DR Learner: 3.893642417299098\n",
622-
"[ATE] DR Learner 95% CI: [[[3.35046186]]\n",
574+
"[ATE] DR Learner: 3.8479455513705227\n",
575+
"[ATE] DR Learner 95% CI: [[[3.12167472]]\n",
623576
"\n",
624-
" [[5.20724188]]]\n"
577+
" [[5.29732315]]]\n"
625578
]
626579
}
627580
],
@@ -657,18 +610,18 @@
657610
},
658611
{
659612
"cell_type": "code",
660-
"execution_count": 11,
613+
"execution_count": 9,
661614
"id": "a91cdd48",
662615
"metadata": {},
663616
"outputs": [
664617
{
665618
"name": "stdout",
666619
"output_type": "stream",
667620
"text": [
668-
"[ATE] DML: 3.464128951245904\n",
669-
"[ATE] DML 95% CI: [[[2.4019656]]\n",
621+
"[ATE] DML: 3.8611289087159135\n",
622+
"[ATE] DML 95% CI: [[[3.06774555]]\n",
670623
"\n",
671-
" [[4.4395689]]]\n"
624+
" [[5.17092571]]]\n"
672625
]
673626
}
674627
],
@@ -696,17 +649,25 @@
696649
"\n"
697650
]
698651
},
652+
{
653+
"cell_type": "markdown",
654+
"id": "60080461",
655+
"metadata": {},
656+
"source": [
657+
"## Refute"
658+
]
659+
},
699660
{
700661
"cell_type": "code",
701-
"execution_count": 20,
702-
"id": "869355b3",
662+
"execution_count": 15,
663+
"id": "18481fc5",
703664
"metadata": {},
704665
"outputs": [
705666
{
706667
"name": "stdout",
707668
"output_type": "stream",
708669
"text": [
709-
"[ATE] DML: 3.464128951245904\n"
670+
"[ATE] DML: 3.8611289087159135\n"
710671
]
711672
}
712673
],
@@ -725,14 +686,6 @@
725686
"print(\"[ATE] DML:\", estimate_dml.value)"
726687
]
727688
},
728-
{
729-
"cell_type": "markdown",
730-
"id": "60080461",
731-
"metadata": {},
732-
"source": [
733-
"## Refute"
734-
]
735-
},
736689
{
737690
"cell_type": "markdown",
738691
"id": "bdcda503",
@@ -749,7 +702,7 @@
749702
},
750703
{
751704
"cell_type": "code",
752-
"execution_count": 21,
705+
"execution_count": 11,
753706
"id": "9ad86368",
754707
"metadata": {},
755708
"outputs": [
@@ -758,9 +711,9 @@
758711
"output_type": "stream",
759712
"text": [
760713
"Refute: Add a random common cause\n",
761-
"Estimated effect:3.6087034824487034\n",
762-
"New effect:3.4568539723790566\n",
763-
"p value:0.42\n",
714+
"Estimated effect:3.5111934415796173\n",
715+
"New effect:3.4686090208687586\n",
716+
"p value:0.72\n",
764717
"\n"
765718
]
766719
}
@@ -785,12 +738,13 @@
785738
"- **기대**: New effect ≈ Estimated effect\n",
786739
"- **해석**:\n",
787740
" - 크게 변하지 않으면 → 잠재적 누락변수(confounder) 에도 견고(robust)\n",
788-
" - 크게 변하면 → 모델이 숨은 교란에 민감, 추가 변수 고려 필요"
741+
" - 크게 변하면 → 모델이 숨은 교란에 민감, 추가 변수 고려 필요\n",
742+
"- **참고**: 도메인 지식을 기반으로, 교란이 처리변수와 결과에 미치는 영향의 크기는 사용자가 직접 설정해야 합니다."
789743
]
790744
},
791745
{
792746
"cell_type": "code",
793-
"execution_count": 22,
747+
"execution_count": 12,
794748
"id": "dcce63b4",
795749
"metadata": {},
796750
"outputs": [
@@ -799,8 +753,8 @@
799753
"output_type": "stream",
800754
"text": [
801755
"Refute: Add an Unobserved Common Cause\n",
802-
"Estimated effect:3.6087034824487034\n",
803-
"New effect:3.4630068223974586\n",
756+
"Estimated effect:3.5111934415796173\n",
757+
"New effect:2.993841050044186\n",
804758
"\n"
805759
]
806760
}
@@ -834,7 +788,7 @@
834788
},
835789
{
836790
"cell_type": "code",
837-
"execution_count": 23,
791+
"execution_count": 13,
838792
"id": "545e6293",
839793
"metadata": {},
840794
"outputs": [
@@ -843,9 +797,9 @@
843797
"output_type": "stream",
844798
"text": [
845799
"Refute: Use a Placebo Treatment\n",
846-
"Estimated effect:3.6087034824487034\n",
847-
"New effect:-0.01017831576756214\n",
848-
"p value:0.4903498144600218\n",
800+
"Estimated effect:3.5111934415796173\n",
801+
"New effect:0.14283539321647556\n",
802+
"p value:0.3866345448655441\n",
849803
"\n"
850804
]
851805
}
@@ -879,7 +833,7 @@
879833
},
880834
{
881835
"cell_type": "code",
882-
"execution_count": 24,
836+
"execution_count": 14,
883837
"id": "69481385",
884838
"metadata": {},
885839
"outputs": [
@@ -888,9 +842,9 @@
888842
"output_type": "stream",
889843
"text": [
890844
"Refute: Use a subset of data\n",
891-
"Estimated effect:3.6087034824487034\n",
892-
"New effect:3.5177935748179356\n",
893-
"p value:0.36231391604195595\n",
845+
"Estimated effect:3.5111934415796173\n",
846+
"New effect:3.4052136015402126\n",
847+
"p value:0.4004970164482444\n",
894848
"\n"
895849
]
896850
}

book/scm/causal_model.png

81.9 KB
Loading

0 commit comments

Comments
 (0)