Merge pull request #32 from strongio/develop

jwdink · web-flow · commit bfb349d53ab9 · 2023-03-10T13:33:29.000-06:00
Develop
diff --git a/foundry/__init__.py b/foundry/__init__.py
@@ -1 +1 @@
-__version__ = '0.2.4'
+__version__ = '0.2.5'
diff --git a/foundry/evaluation/marginal_effects.py b/foundry/evaluation/marginal_effects.py
@@ -127,7 +127,7 @@ def __call__(self,
         :param groupby_features: Strings indicating the feature(s) to group/segment on, so as to observe different
          effects per segment. By default will be binned by passing to :function:`foundry.evaluation.binned`. You can
          pass to this function yourself to manually control/remove binning.
-        :param vary_features_aggfun: The varying feature(s) will be binned, then within each bin we need to convert
+        :param vary_features_aggfun: Numeric varying feature(s) will be binned, then within each bin we need to convert
          back to numeric before plugging into the model. This string indicates how to do so (default: mean). Either an
          aggregation that will be applied, or 'mid' to use the midpoint of the bin. The latter will be used regardless
          when no actual data exists in that bin. This argument can also be a dictionary with keys being feature-names.
@@ -139,15 +139,15 @@ def __call__(self,
          effect if ``marginalize_aggfun` is False/None.
         :param predict_kwargs: Keyword-arguments to pass to the pipeline's ``predict`` method.
         """
+        X = X.copy(deep=False)
         if isinstance(marginalize_aggfun, str) and marginalize_aggfun.startswith('downsample'):
             downsample_int = int(marginalize_aggfun.replace('downsample', '').rstrip('_'))
-            idx = np.random.choice(X.shape[0], size=downsample_int, replace=False)
-            X = _safe_indexing(X, idx)
-            if y is not None:
-                y = _safe_indexing(y, idx)
+            if X.shape[0] > downsample_int:
+                idx = np.random.choice(X.shape[0], size=downsample_int, replace=False)
+                X = _safe_indexing(X, idx)
+                if y is not None:
+                    y = _safe_indexing(y, idx)
             marginalize_aggfun = False
-        else:
-            X = X.copy(deep=False)
 
         # validate/standardize args:
         vary_features = self._standardize_maybe_binned(X, vary_features)
@@ -175,7 +175,6 @@ def __call__(self,
         )
 
         # vary features ----
-        # TODO: this gets ignored for categorical features
         default = vary_features_aggfun.pop('_default', 'mean') if isinstance(vary_features_aggfun, dict) else 'mean'
         vary_features_aggfuns = self._standardize_maybe_dict(
             maybe_dict=vary_features_aggfun,
diff --git a/foundry/glm/glm.py b/foundry/glm/glm.py
@@ -139,15 +139,15 @@ class Glm(BaseEstimator):
      columns, these can be functions that takes the data and return the relevant columns: e.g.
      ``col_mapping={'loc':sklearn.compose.make_column_selector('^col+.'), 'scale':[col1]}``.
     :param sparse_mm_threshold: Density threshold for creating a sparse model-matrix. If X has density less than this,
-     the model-matrix will be sparse; otherwise it will be dense. Default .05.
+     the model-matrix will be sparse; otherwise it will be dense. Default 0, meaning never use sparse tensors.
     """
     family_names = family_names
 
     def __init__(self,
                  family: Union[str, Family],
                  penalty: Union[float, Sequence[float], Dict[str, float]] = 0.,
                  col_mapping: Union[list, dict, None] = None,
-                 sparse_mm_threshold: float = .01,
+                 sparse_mm_threshold: float = 0.0,
                  _warm_start: Optional[dict] = None):
 
         self.family = family
@@ -275,6 +275,7 @@ def fit(self,
             # search:
             if kwargs.get('verbose', True):
                 print("GridSearchCV...")
+            cv_kwargs = cv_kwargs or {}
             gcv = GridSearchCV(
                 estimator=self,
                 param_grid={'penalty': penalties},
@@ -290,6 +291,8 @@ def fit(self,
             self.set_params(penalty=best_penalty, _warm_start=None)
             return self._fit(X=X, y=y, **kwargs)
         else:
+            if cv_kwargs:
+                warn("Ignoring `cv_kwargs`, penalty is scalar.")
             return self._fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
 
     @retry(retry=retry_if_exception_type(FitFailedException), reraise=True, stop=stop_after_attempt(N_FIT_RETRIES + 1))

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.2.4'`
	`1`	`+__version__ = '0.2.5'`