12
12
from sklearn .metrics import mean_squared_error as mse
13
13
import numpy as np
14
14
from sklearn .model_selection import cross_val_score
15
+ import pandas as pd
15
16
16
17
17
18
class DAGAdequacy :
@@ -38,21 +39,30 @@ def __init__(self, test_case: CausalTestCase, test_engine: CausalTestEngine, est
38
39
self .test_engine = test_engine
39
40
self .estimator = estimator
40
41
41
- def measure_adequacy_bootstrap (self , bootstrap_size : int = 100 ):
42
+ def measure_adequacy (self , bootstrap_size : int = 100 ):
42
43
results = []
43
44
for i in range (bootstrap_size ):
44
45
estimator = deepcopy (self .estimator )
45
46
estimator .df = estimator .df .sample (len (estimator .df ), replace = True , random_state = i )
46
47
results .append (self .test_engine .execute_test (estimator , self .test_case ))
47
- return results
48
+ outcomes = [self .test_case .expected_causal_effect .apply (c ) for c in results ]
49
+ results = pd .DataFrame (c .to_dict () for c in results )[["effect_estimate" , "ci_low" , "ci_high" ]]
48
50
49
- def measure_adequacy_k_folds (self , k : int = 10 , random_state = 0 ):
50
- results = []
51
- kf = KFold (n_splits = k , shuffle = True , random_state = random_state )
52
- for train_inx , test_inx in kf .split (self .estimator .df ):
53
- estimator = deepcopy (self .estimator )
54
- test = estimator .df .iloc [test_inx ]
55
- estimator .df = estimator .df .iloc [train_inx ]
56
- test_result = estimator .model .predict (test )
57
- results .append (np .sqrt (mse (test_result , test [self .test_case .base_test_case .outcome_variable .name ])).mean ())
58
- return np .mean (results )
51
+ def convert_to_df (field ):
52
+ converted = []
53
+ for r in results [field ]:
54
+ if isinstance (r , float ):
55
+ converted .append (
56
+ pd .DataFrame ({self .test_case .base_test_case .treatment_variable .name : [r ]}).transpose ()
57
+ )
58
+ else :
59
+ converted .append (r )
60
+ return converted
61
+
62
+ for field in ["effect_estimate" , "ci_low" , "ci_high" ]:
63
+ results [field ] = convert_to_df (field )
64
+
65
+ effect_estimate = pd .concat (results ["effect_estimate" ].tolist (), axis = 1 ).transpose ().reset_index (drop = True )
66
+ ci_low = pd .concat (results ["ci_low" ].tolist (), axis = 1 ).transpose ()
67
+ ci_high = pd .concat (results ["ci_high" ].tolist (), axis = 1 ).transpose ()
68
+ return effect_estimate .kurtosis (), ci_low .kurtosis (), ci_high .kurtosis (), outcomes
0 commit comments