7
7
CausalForestEstimator ,
8
8
LogisticRegressionEstimator ,
9
9
InstrumentalVariableEstimator ,
10
- CubicSplineRegressionEstimator
10
+ CubicSplineRegressionEstimator ,
11
11
)
12
12
from causal_testing .specification .variable import Input
13
13
from causal_testing .utils .validation import CausalValidator
@@ -78,21 +78,7 @@ class TestLogisticRegressionEstimator(unittest.TestCase):
78
78
79
79
@classmethod
80
80
def setUpClass (cls ) -> None :
81
- cls .scarf_df = pd .DataFrame (
82
- [
83
- {"length_in" : 55 , "large_gauge" : 1 , "color" : "orange" , "completed" : 1 },
84
- {"length_in" : 55 , "large_gauge" : 0 , "color" : "orange" , "completed" : 1 },
85
- {"length_in" : 55 , "large_gauge" : 0 , "color" : "brown" , "completed" : 1 },
86
- {"length_in" : 60 , "large_gauge" : 0 , "color" : "brown" , "completed" : 1 },
87
- {"length_in" : 60 , "large_gauge" : 0 , "color" : "grey" , "completed" : 0 },
88
- {"length_in" : 70 , "large_gauge" : 0 , "color" : "grey" , "completed" : 1 },
89
- {"length_in" : 70 , "large_gauge" : 0 , "color" : "orange" , "completed" : 0 },
90
- {"length_in" : 82 , "large_gauge" : 1 , "color" : "grey" , "completed" : 1 },
91
- {"length_in" : 82 , "large_gauge" : 0 , "color" : "brown" , "completed" : 0 },
92
- {"length_in" : 82 , "large_gauge" : 0 , "color" : "orange" , "completed" : 0 },
93
- {"length_in" : 82 , "large_gauge" : 1 , "color" : "brown" , "completed" : 0 },
94
- ]
95
- )
81
+ cls .scarf_df = pd .read_csv ("tests/data/scarf_data.csv" )
96
82
97
83
# Yes, this probably shouldn't be in here, but it uses the scarf data so it makes more sense to put it
98
84
# here than duplicating the scarf data for a single test
@@ -416,12 +402,11 @@ def test_program_11_2_with_robustness_validation(self):
416
402
417
403
418
404
class TestCubicSplineRegressionEstimator (TestLinearRegressionEstimator ):
419
-
420
405
@classmethod
421
-
422
406
def setUpClass (cls ):
423
407
424
408
super ().setUpClass ()
409
+
425
410
def test_program_11_3_cublic_spline (self ):
426
411
427
412
"""Test whether the cublic_spline regression implementation produces the same results as program 11.3 (p. 162).
@@ -431,8 +416,7 @@ def test_program_11_3_cublic_spline(self):
431
416
432
417
df = self .chapter_11_df .copy ()
433
418
434
- cublic_spline_estimator = CubicSplineRegressionEstimator (
435
- "treatments" , 1 , 0 , set (), "outcomes" , 3 , df )
419
+ cublic_spline_estimator = CubicSplineRegressionEstimator ("treatments" , 1 , 0 , set (), "outcomes" , 3 , df )
436
420
437
421
model = cublic_spline_estimator ._run_linear_regression ()
438
422
@@ -453,8 +437,6 @@ def test_program_11_3_cublic_spline(self):
453
437
self .assertAlmostEqual (ate_1 * 2 , ate_2 )
454
438
455
439
456
-
457
-
458
440
class TestCausalForestEstimator (unittest .TestCase ):
459
441
"""Test the linear regression estimator against the programming exercises in Section 2 of Hernán and Robins [1].
460
442
@@ -527,15 +509,29 @@ def setUpClass(cls) -> None:
527
509
df = pd .DataFrame ({"X1" : np .random .uniform (- 1000 , 1000 , 1000 ), "X2" : np .random .uniform (- 1000 , 1000 , 1000 )})
528
510
df ["Y" ] = 2 * df ["X1" ] - 3 * df ["X2" ] + 2 * df ["X1" ] * df ["X2" ] + 10
529
511
cls .df = df
512
+ cls .scarf_df = pd .read_csv ("tests/data/scarf_data.csv" )
530
513
531
514
def test_X1_effect (self ):
532
515
"""When we fix the value of X2 to 0, the effect of X1 on Y should become ~2 (because X2 terms are cancelled)."""
533
- x2 = Input ("X2" , float )
534
516
lr_model = LinearRegressionEstimator (
535
- "X1" , 1 , 0 , {"X2" }, "Y" , effect_modifiers = {x2 . name : 0 }, formula = "Y ~ X1 + X2 + (X1 * X2)" , df = self .df
517
+ "X1" , 1 , 0 , {"X2" }, "Y" , effect_modifiers = {"x2" : 0 }, formula = "Y ~ X1 + X2 + (X1 * X2)" , df = self .df
536
518
)
537
519
test_results = lr_model .estimate_ate ()
538
520
ate = test_results [0 ]
539
521
self .assertAlmostEqual (ate , 2.0 )
540
522
523
+ def test_categorical_confidence_intervals (self ):
524
+ lr_model = LinearRegressionEstimator (
525
+ treatment = "color" ,
526
+ control_value = None ,
527
+ treatment_value = None ,
528
+ adjustment_set = {},
529
+ outcome = "length_in" ,
530
+ df = self .scarf_df ,
531
+ )
532
+ coefficients , [ci_low , ci_high ] = lr_model .estimate_coefficient ()
541
533
534
+ # The precise values don't really matter. This test is primarily intended to make sure the return type is correct.
535
+ self .assertTrue (coefficients .round (2 ).equals (pd .Series ({"color[T.grey]" : 0.92 , "color[T.orange]" : - 4.25 })))
536
+ self .assertTrue (ci_low .round (2 ).equals (pd .Series ({"color[T.grey]" : - 22.12 , "color[T.orange]" : - 25.58 })))
537
+ self .assertTrue (ci_high .round (2 ).equals (pd .Series ({"color[T.grey]" : 23.95 , "color[T.orange]" : 17.08 })))
0 commit comments