Skip to content

Commit bfb349d

Browse files
authored
Merge pull request #32 from strongio/develop
Develop
2 parents 3c74c85 + 29ef7cf commit bfb349d

File tree

3 files changed

+13
-11
lines changed

3 files changed

+13
-11
lines changed

foundry/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.2.4'
1+
__version__ = '0.2.5'

foundry/evaluation/marginal_effects.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def __call__(self,
127127
:param groupby_features: Strings indicating the feature(s) to group/segment on, so as to observe different
128128
effects per segment. By default will be binned by passing to :function:`foundry.evaluation.binned`. You can
129129
pass to this function yourself to manually control/remove binning.
130-
:param vary_features_aggfun: The varying feature(s) will be binned, then within each bin we need to convert
130+
:param vary_features_aggfun: Numeric varying feature(s) will be binned, then within each bin we need to convert
131131
back to numeric before plugging into the model. This string indicates how to do so (default: mean). Either an
132132
aggregation that will be applied, or 'mid' to use the midpoint of the bin. The latter will be used regardless
133133
when no actual data exists in that bin. This argument can also be a dictionary with keys being feature-names.
@@ -139,15 +139,15 @@ def __call__(self,
139139
effect if ``marginalize_aggfun` is False/None.
140140
:param predict_kwargs: Keyword-arguments to pass to the pipeline's ``predict`` method.
141141
"""
142+
X = X.copy(deep=False)
142143
if isinstance(marginalize_aggfun, str) and marginalize_aggfun.startswith('downsample'):
143144
downsample_int = int(marginalize_aggfun.replace('downsample', '').rstrip('_'))
144-
idx = np.random.choice(X.shape[0], size=downsample_int, replace=False)
145-
X = _safe_indexing(X, idx)
146-
if y is not None:
147-
y = _safe_indexing(y, idx)
145+
if X.shape[0] > downsample_int:
146+
idx = np.random.choice(X.shape[0], size=downsample_int, replace=False)
147+
X = _safe_indexing(X, idx)
148+
if y is not None:
149+
y = _safe_indexing(y, idx)
148150
marginalize_aggfun = False
149-
else:
150-
X = X.copy(deep=False)
151151

152152
# validate/standardize args:
153153
vary_features = self._standardize_maybe_binned(X, vary_features)
@@ -175,7 +175,6 @@ def __call__(self,
175175
)
176176

177177
# vary features ----
178-
# TODO: this gets ignored for categorical features
179178
default = vary_features_aggfun.pop('_default', 'mean') if isinstance(vary_features_aggfun, dict) else 'mean'
180179
vary_features_aggfuns = self._standardize_maybe_dict(
181180
maybe_dict=vary_features_aggfun,

foundry/glm/glm.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,15 @@ class Glm(BaseEstimator):
139139
columns, these can be functions that takes the data and return the relevant columns: e.g.
140140
``col_mapping={'loc':sklearn.compose.make_column_selector('^col+.'), 'scale':[col1]}``.
141141
:param sparse_mm_threshold: Density threshold for creating a sparse model-matrix. If X has density less than this,
142-
the model-matrix will be sparse; otherwise it will be dense. Default .05.
142+
the model-matrix will be sparse; otherwise it will be dense. Default 0, meaning never use sparse tensors.
143143
"""
144144
family_names = family_names
145145

146146
def __init__(self,
147147
family: Union[str, Family],
148148
penalty: Union[float, Sequence[float], Dict[str, float]] = 0.,
149149
col_mapping: Union[list, dict, None] = None,
150-
sparse_mm_threshold: float = .01,
150+
sparse_mm_threshold: float = 0.0,
151151
_warm_start: Optional[dict] = None):
152152

153153
self.family = family
@@ -275,6 +275,7 @@ def fit(self,
275275
# search:
276276
if kwargs.get('verbose', True):
277277
print("GridSearchCV...")
278+
cv_kwargs = cv_kwargs or {}
278279
gcv = GridSearchCV(
279280
estimator=self,
280281
param_grid={'penalty': penalties},
@@ -290,6 +291,8 @@ def fit(self,
290291
self.set_params(penalty=best_penalty, _warm_start=None)
291292
return self._fit(X=X, y=y, **kwargs)
292293
else:
294+
if cv_kwargs:
295+
warn("Ignoring `cv_kwargs`, penalty is scalar.")
293296
return self._fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
294297

295298
@retry(retry=retry_if_exception_type(FitFailedException), reraise=True, stop=stop_after_attempt(N_FIT_RETRIES + 1))

0 commit comments

Comments
 (0)