7
7
CausalForestEstimator ,
8
8
LogisticRegressionEstimator ,
9
9
InstrumentalVariableEstimator ,
10
- CubicSplineRegressionEstimator
10
+ CubicSplineRegressionEstimator ,
11
11
)
12
12
from causal_testing .specification .variable import Input
13
13
from causal_testing .utils .validation import CausalValidator
@@ -78,21 +78,7 @@ class TestLogisticRegressionEstimator(unittest.TestCase):
78
78
79
79
@classmethod
80
80
def setUpClass (cls ) -> None :
81
- cls .scarf_df = pd .DataFrame (
82
- [
83
- {"length_in" : 55 , "large_gauge" : 1 , "color" : "orange" , "completed" : 1 },
84
- {"length_in" : 55 , "large_gauge" : 0 , "color" : "orange" , "completed" : 1 },
85
- {"length_in" : 55 , "large_gauge" : 0 , "color" : "brown" , "completed" : 1 },
86
- {"length_in" : 60 , "large_gauge" : 0 , "color" : "brown" , "completed" : 1 },
87
- {"length_in" : 60 , "large_gauge" : 0 , "color" : "grey" , "completed" : 0 },
88
- {"length_in" : 70 , "large_gauge" : 0 , "color" : "grey" , "completed" : 1 },
89
- {"length_in" : 70 , "large_gauge" : 0 , "color" : "orange" , "completed" : 0 },
90
- {"length_in" : 82 , "large_gauge" : 1 , "color" : "grey" , "completed" : 1 },
91
- {"length_in" : 82 , "large_gauge" : 0 , "color" : "brown" , "completed" : 0 },
92
- {"length_in" : 82 , "large_gauge" : 0 , "color" : "orange" , "completed" : 0 },
93
- {"length_in" : 82 , "large_gauge" : 1 , "color" : "brown" , "completed" : 0 },
94
- ]
95
- )
81
+ cls .scarf_df = pd .read_csv ("tests/data/scarf_data.csv" )
96
82
97
83
# Yes, this probably shouldn't be in here, but it uses the scarf data so it makes more sense to put it
98
84
# here than duplicating the scarf data for a single test
@@ -231,7 +217,7 @@ def test_program_11_2(self):
231
217
self .assertEqual (round (model .params ["Intercept" ] + 90 * model .params ["treatments" ], 1 ), 216.9 )
232
218
233
219
# Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
234
- self .assertEqual ( round (model .params ["treatments" ], 1 ), round (ate [ 0 ] , 1 ))
220
+ self .assertTrue ( all ( round (model .params ["treatments" ], 1 ) == round (ate_single , 1 ) for ate_single in ate ))
235
221
236
222
def test_program_11_3 (self ):
237
223
"""Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
@@ -251,7 +237,7 @@ def test_program_11_3(self):
251
237
197.1 ,
252
238
)
253
239
# Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
254
- self .assertEqual ( round (model .params ["treatments" ], 3 ), round (ate [ 0 ] , 3 ))
240
+ self .assertTrue ( all ( round (model .params ["treatments" ], 3 ) == round (ate_single , 3 ) for ate_single in ate ))
255
241
256
242
def test_program_15_1A (self ):
257
243
"""Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)."""
@@ -329,6 +315,7 @@ def test_program_15_no_interaction(self):
329
315
# terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
330
316
# for term_to_square in terms_to_square:
331
317
ate , [ci_low , ci_high ] = linear_regression_estimator .estimate_coefficient ()
318
+
332
319
self .assertEqual (round (ate [0 ], 1 ), 3.5 )
333
320
self .assertEqual ([round (ci_low [0 ], 1 ), round (ci_high [0 ], 1 )], [2.6 , 4.3 ])
334
321
@@ -416,12 +403,11 @@ def test_program_11_2_with_robustness_validation(self):
416
403
417
404
418
405
class TestCubicSplineRegressionEstimator (TestLinearRegressionEstimator ):
419
-
420
406
@classmethod
421
-
422
407
def setUpClass (cls ):
423
408
424
409
super ().setUpClass ()
410
+
425
411
def test_program_11_3_cublic_spline (self ):
426
412
427
413
"""Test whether the cublic_spline regression implementation produces the same results as program 11.3 (p. 162).
@@ -431,8 +417,7 @@ def test_program_11_3_cublic_spline(self):
431
417
432
418
df = self .chapter_11_df .copy ()
433
419
434
- cublic_spline_estimator = CubicSplineRegressionEstimator (
435
- "treatments" , 1 , 0 , set (), "outcomes" , 3 , df )
420
+ cublic_spline_estimator = CubicSplineRegressionEstimator ("treatments" , 1 , 0 , set (), "outcomes" , 3 , df )
436
421
437
422
model = cublic_spline_estimator ._run_linear_regression ()
438
423
@@ -453,8 +438,6 @@ def test_program_11_3_cublic_spline(self):
453
438
self .assertAlmostEqual (ate_1 [0 ] * 2 , ate_2 [0 ])
454
439
455
440
456
-
457
-
458
441
class TestCausalForestEstimator (unittest .TestCase ):
459
442
"""Test the linear regression estimator against the programming exercises in Section 2 of Hernán and Robins [1].
460
443
@@ -527,15 +510,29 @@ def setUpClass(cls) -> None:
527
510
df = pd .DataFrame ({"X1" : np .random .uniform (- 1000 , 1000 , 1000 ), "X2" : np .random .uniform (- 1000 , 1000 , 1000 )})
528
511
df ["Y" ] = 2 * df ["X1" ] - 3 * df ["X2" ] + 2 * df ["X1" ] * df ["X2" ] + 10
529
512
cls .df = df
513
+ cls .scarf_df = pd .read_csv ("tests/data/scarf_data.csv" )
530
514
531
515
def test_X1_effect (self ):
532
516
"""When we fix the value of X2 to 0, the effect of X1 on Y should become ~2 (because X2 terms are cancelled)."""
533
- x2 = Input ("X2" , float )
534
517
lr_model = LinearRegressionEstimator (
535
- "X1" , 1 , 0 , {"X2" }, "Y" , effect_modifiers = {x2 . name : 0 }, formula = "Y ~ X1 + X2 + (X1 * X2)" , df = self .df
518
+ "X1" , 1 , 0 , {"X2" }, "Y" , effect_modifiers = {"x2" : 0 }, formula = "Y ~ X1 + X2 + (X1 * X2)" , df = self .df
536
519
)
537
520
test_results = lr_model .estimate_ate ()
538
521
ate = test_results [0 ][0 ]
539
522
self .assertAlmostEqual (ate , 2.0 )
540
523
524
+ def test_categorical_confidence_intervals (self ):
525
+ lr_model = LinearRegressionEstimator (
526
+ treatment = "color" ,
527
+ control_value = None ,
528
+ treatment_value = None ,
529
+ adjustment_set = {},
530
+ outcome = "length_in" ,
531
+ df = self .scarf_df ,
532
+ )
533
+ coefficients , [ci_low , ci_high ] = lr_model .estimate_coefficient ()
541
534
535
+ # The precise values don't really matter. This test is primarily intended to make sure the return type is correct.
536
+ self .assertTrue (coefficients .round (2 ).equals (pd .Series ({"color[T.grey]" : 0.92 , "color[T.orange]" : - 4.25 })))
537
+ self .assertTrue (ci_low .round (2 ).equals (pd .Series ({"color[T.grey]" : - 22.12 , "color[T.orange]" : - 25.58 })))
538
+ self .assertTrue (ci_high .round (2 ).equals (pd .Series ({"color[T.grey]" : 23.95 , "color[T.orange]" : 17.08 })))
0 commit comments