Skip to content

Commit a5f057f

Browse files
committed
update binary outcome handling
1 parent cbad09a commit a5f057f

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

vimpy/cv_vim.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ class cv_vim:
1818
@param measure_type the predictiveness measure to use (for now, one of "r_squared", "auc", "accuracy", "deviance")
1919
@param V the number of cross-fitting folds (defaults to 5)
2020
@param pred_func the function that predicts outcome given features
21+
@param ensemble is pred_func an ensemble (True) or a single function (False, default)
2122
@param f fitted values from regression of outcome on all features (only used if pred_func is not specified)
2223
@param r fitted values from regression of outcome on reduced set of features (only used if pred_func is not specified)
2324
@param folds a list of length 3: outer folds, for hypothesis testing; inner folds based on the outer folds == 1 (for cross-fitting); inner folds based on outer folds == 0 (for cross-fitting)
2425
@param na_rm remove NAs prior to computing predictiveness? (defaults to False)
2526
"""
26-
def __init__(self, y, x, s, measure_type, V = 5, pred_func = None, f = None, r = None, folds = None, na_rm = False):
27+
def __init__(self, y, x, s, measure_type, V = 5, pred_func = None, ensemble = False, f = None, r = None, folds = None, na_rm = False):
2728
self.y_ = y
2829
self.x_ = x
2930
self.s_ = s
@@ -67,6 +68,7 @@ def __init__(self, y, x, s, measure_type, V = 5, pred_func = None, f = None, r =
6768
## if only two unique values in y, assume binary
6869
self.binary_ = (np.unique(y).shape[0] == 2)
6970
self.na_rm_ = na_rm
71+
self.ensemble_ = ensemble
7072

7173
## calculate the plug-in estimator
7274
def get_point_est(self):
@@ -82,8 +84,8 @@ def get_point_est(self):
8284
this_redu_func = self.r_
8385
folds_1 = self.folds_inner_1
8486
folds_0 = self.folds_inner_0
85-
self.v_full_, self.preds_full_, ic_full, self.folds_inner_1, self.cc_1 = predictiveness_func(self.x_[self.folds_outer_ == 1, :], self.y_[self.folds_outer_ == 1], np.arange(self.p_), self.measure_, this_full_func, V = self.V_, stratified = self.binary_, na_rm = self.na_rm_, folds = folds_1)
86-
self.v_redu_, self.preds_redu_, ic_redu, self.folds_inner_0, self.cc_0 = predictiveness_func(self.x_[self.folds_outer_ == 0, :], self.y_[self.folds_outer_ == 0], np.delete(np.arange(self.p_), self.s_), self.measure_, this_redu_func, V = self.V_, stratified = self.binary_, na_rm = self.na_rm_, folds = folds_0)
87+
self.v_full_, self.preds_full_, ic_full, self.folds_inner_1, self.cc_1 = predictiveness_func(self.x_[self.folds_outer_ == 1, :], self.y_[self.folds_outer_ == 1], np.arange(self.p_), self.measure_, this_full_func, V = self.V_, stratified = self.binary_, na_rm = self.na_rm_, folds = folds_1, ensemble = self.ensemble_)
88+
self.v_redu_, self.preds_redu_, ic_redu, self.folds_inner_0, self.cc_0 = predictiveness_func(self.x_[self.folds_outer_ == 0, :], self.y_[self.folds_outer_ == 0], np.delete(np.arange(self.p_), self.s_), self.measure_, this_redu_func, V = self.V_, stratified = self.binary_, na_rm = self.na_rm_, folds = folds_0, ensemble = self.ensemble_)
8789
self.vimp_ = self.v_full_ - self.v_redu_
8890
self.ic_full_[:ic_full.shape[0]] = ic_full
8991
self.ic_redu_[:ic_redu.shape[0]] = ic_redu

vimpy/predictiveness_measures.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def cv_predictiveness(x, y, S, measure, pred_func, V = 5, stratified = True, na_
4949
if measure.__name__ in ["r_squared"]:
5050
preds_v = pred_func.predict(x_train[:, S])
5151
else:
52-
preds_v = pred_func.predict_proba(x_train[:, S])
52+
preds_v = pred_func.predict_proba(x_train[:, S])[:, 1]
5353
preds[cc_cond] = preds_v
5454
vs[0] = measure(y_train, preds_v)
5555
ics[cc_cond] = compute_ic(y_train, preds_v, measure.__name__)
@@ -73,7 +73,7 @@ def cv_predictiveness(x, y, S, measure, pred_func, V = 5, stratified = True, na_
7373

7474

7575
# general predictiveness based on precomputed fits
76-
def cv_predictiveness_precomputed(x, y, S, measure, f, V = 5, stratified = True, folds = None, na_rm = False):
76+
def cv_predictiveness_precomputed(x, y, S, measure, f, V = 5, stratified = True, folds = None, na_rm = False, ensemble = False):
7777
"""
7878
Compute a cross-validated measure of predictiveness based on the data, the chosen measure, and the sets of fitted values f and r
7979
@@ -86,6 +86,7 @@ def cv_predictiveness_precomputed(x, y, S, measure, f, V = 5, stratified = True,
8686
@param stratified: should the folds be stratified?
8787
@param folds: the CV folds
8888
@param na_rm: should we do a complete-case analysis (True) or not (False)
89+
@param ensemble: is this an ensemble or not (dummy)
8990
9091
@return cross-validated measure of predictiveness, along with preds and ics
9192
"""

vimpy/vim.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@ class vim:
1717
@param s the feature group of interest
1818
@param measure_type the predictiveness measure to use (for now, one of "r_squared", "auc", "accuracy", "deviance")
1919
@param pred_func the function that predicts outcome given features
20+
@param ensemble is pred_func an ensemble (True) or a single function (False, default)
2021
@param f fitted values from regression of outcome on all features (only used if pred_func is not specified)
2122
@param r fitted values from regression of outcome on reduced set of features (only used if pred_func is not specified)
2223
@param folds outer folds, for hypothesis testing (only used if pred_func is not specified)
2324
@param na_rm remove NAs prior to computing predictiveness? (defaults to False)
2425
2526
@return an object of class vim
2627
"""
27-
def __init__(self, y, x, s, measure_type, pred_func = None, f = None, r = None, folds = None, na_rm = False):
28+
def __init__(self, y, x, s, measure_type, pred_func = None, ensemble = False, f = None, r = None, folds = None, na_rm = False):
2829
self.y_ = y
2930
self.x_ = x
3031
self.s_ = s
@@ -66,6 +67,7 @@ def __init__(self, y, x, s, measure_type, pred_func = None, f = None, r = None,
6667
## if only two unique values in y, assume binary
6768
self.binary_ = (np.unique(y).shape[0] == 2)
6869
self.na_rm_ = na_rm
70+
self.ensemble_ = ensemble
6971

7072
## calculate the variable importance estimate
7173
def get_point_est(self):
@@ -79,8 +81,8 @@ def get_point_est(self):
7981
this_full_func = self.f_
8082
this_redu_func = self.r_
8183
folds = None
82-
self.v_full_, self.preds_full_, ic_full, self.folds_inner_1, self.cc_1 = predictiveness_func(self.x_[self.folds_outer_ == 1, :], self.y_[self.folds_outer_ == 1], np.arange(self.p_), self.measure_, this_full_func, V = 1, stratified = self.binary_, na_rm = self.na_rm_, folds = folds)
83-
self.v_redu_, self.preds_redu_, ic_redu, self.folds_inner_0, self.cc_0 = predictiveness_func(self.x_[self.folds_outer_ == 0, :], self.y_[self.folds_outer_ == 0], np.delete(np.arange(self.p_), self.s_), self.measure_, this_redu_func, V = 1, stratified = self.binary_, na_rm = self.na_rm_, folds = folds)
84+
self.v_full_, self.preds_full_, ic_full, self.folds_inner_1, self.cc_1 = predictiveness_func(self.x_[self.folds_outer_ == 1, :], self.y_[self.folds_outer_ == 1], np.arange(self.p_), self.measure_, this_full_func, V = 1, stratified = self.binary_, na_rm = self.na_rm_, folds = folds, ensemble = self.ensemble_)
85+
self.v_redu_, self.preds_redu_, ic_redu, self.folds_inner_0, self.cc_0 = predictiveness_func(self.x_[self.folds_outer_ == 0, :], self.y_[self.folds_outer_ == 0], np.delete(np.arange(self.p_), self.s_), self.measure_, this_redu_func, V = 1, stratified = self.binary_, na_rm = self.na_rm_, folds = folds, ensemble = self.ensemble_)
8486
self.vimp_ = self.v_full_ - self.v_redu_
8587
self.ic_full_[:ic_full.shape[0]] = ic_full
8688
self.ic_redu_[:ic_redu.shape[0]] = ic_redu

0 commit comments

Comments
 (0)