Skip to content

Commit 069afc9

Browse files
authored
Fix default n_subset based on replace flag (bootstrap logic) (#618)
Also, Add Efron (1979) bootstrap reference to n_subset docstring
1 parent 56676d9 commit 069afc9

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

pysindy/optimizers/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,11 @@ class EnsembleOptimizer(BaseOptimizer):
273273
Number of models to generate via ensemble
274274
275275
n_subset : int, optional (default len(time base))
276-
Number of time points to use for ensemble
276+
Number of time points to use for ensemble.
277+
When bagging with replacement (bootstrap), a value equal to the
278+
original number of samples is standard.
279+
See: B. Efron (1979), "Bootstrap Methods: Another Look at the
280+
Jackknife", The Annals of Statistics.
277281
278282
n_candidates_to_drop : int, optional (default 1)
279283
Number of candidate terms in the feature library to drop during
@@ -351,7 +355,10 @@ def _reduce(self, x: AxesArray, y: np.ndarray) -> None:
351355
x = AxesArray(np.asarray(x), {"ax_sample": 0, "ax_coord": 1})
352356
n_samples = x.shape[x.ax_sample]
353357
if self.bagging and self.n_subset is None:
354-
self.n_subset = int(0.6 * n_samples)
358+
if self.replace:
359+
self.n_subset = n_samples
360+
else:
361+
self.n_subset = int(0.6 * n_samples)
355362
if self.bagging and self.n_subset > n_samples and not self.replace:
356363
warnings.warn(
357364
"n_subset is larger than sample count without replacement; cannot bag."

0 commit comments

Comments
 (0)