diff --git a/mlxtend/feature_selection/columns.py b/mlxtend/feature_selection/columns.py
new file mode 100644
index 000000000..281955d67
--- /dev/null
+++ b/mlxtend/feature_selection/columns.py
@@ -0,0 +1,271 @@
+
+from typing import NamedTuple, Any
+from copy import copy
+
+import numpy as np
+from sklearn.base import clone
+from sklearn.preprocessing import (OneHotEncoder,
+                                   OrdinalEncoder)
+from sklearn.utils.validation import check_is_fitted
+from sklearn.exceptions import NotFittedError
+
+
+class Column(NamedTuple):
+
+    """
+    A column extractor with a possible
+    encoder (following `sklearn` fit/transform template).
+    """
+
+    idx: Any
+    name: str
+    is_categorical: bool = False
+    is_ordinal: bool = False
+    columns: tuple = ()
+    encoder: Any = None
+
+    def get_columns(self, X, fit=False):
+
+        """
+        Extract associated column from X,
+        encoding it with `self.encoder` if not None.
+
+        Parameters
+        ----------
+
+        X : array-like
+            X on which model matrix will be evaluated.
+            If a `pd.DataFrame` or `pd.Series`, variables that are of
+            categorical dtype will be treated as categorical.
+
+        fit : bool
+            If True, fit `self.encoder` on corresponding
+            column.
+
+        Returns
+        -------
+        cols : `np.ndarray`
+            Evaluated columns -- if an encoder is used,
+            several columns may be produced.
+
+        names : (str,)
+            Column names
+        """
+
+        cols = _get_column(self.idx, X, twodim=self.encoder is not None)
+        if fit:
+            self.fit_encoder(X)
+
+        if self.encoder is not None:
+            cols = self.encoder.transform(cols)
+        cols = np.asarray(cols)
+
+        names = self.columns
+        if hasattr(self.encoder, 'columns_'):
+            names = ['{0}[{1}]'.format(self.name, c)
+                     for c in self.encoder.columns_]
+        if not names:
+            names = ['{0}[{1}]'.format(self.name, i)
+                     for i in range(cols.shape[1])]
+        return cols, names
+
+    def fit_encoder(self, X):
+
+        """
+        Fit `self.encoder`.
+
+        Parameters
+        ----------
+        X : array-like
+            X on which encoder will be fit.
+
+        Returns
+        -------
+        None
+        """
+        cols = _get_column(self.idx, X, twodim=self.encoder is not None)
+        if self.encoder is not None:
+            try:
+                check_is_fitted(self.encoder)
+            except NotFittedError:
+                self.encoder.fit(cols)
+        return np.asarray(cols)
+
+
+# private functions
+
+
+def _get_column(idx, X, twodim=False, loc=True):
+    """
+    Extract column `idx` from `X`,
+    optionally making it two-dimensional
+    as many sklearn encoders assume
+    two-dimensional input
+    """
+    if isinstance(X, np.ndarray):
+        col = X[:, idx]
+    elif hasattr(X, 'loc'):
+        if loc:
+            col = X.loc[:, idx]
+        else:   # use iloc instead
+            col = X.iloc[:, idx]
+    else:
+        raise ValueError('expecting an ndarray or a ' +
+                         '"loc/iloc" methods, got %s' % str(X))
+    if twodim and np.asarray(col).ndim == 1:
+        return np.asarray(col).reshape((-1, 1))
+    return np.asarray(col)
+
+    
+def _get_column_info(X,
+                     columns,
+                     is_categorical,
+                     is_ordinal,
+                     default_encoders={
+                         'ordinal': OrdinalEncoder(),
+                         'categorical': OneHotEncoder(drop='first',
+                                                      sparse=False)
+                         }
+                     ):
+
+
+    """
+    Compute a dictionary
+    of `Column` instances for each column
+    of `X`. Keys are `columns`.
+
+    Categorical and ordinal columns use the
+    default encoding provided.
+
+    """
+
+    column_info = {}
+    for i, col in enumerate(columns):
+        if type(col) == int:
+            name = f'X{col}'
+        else:
+            name = str(col)
+        Xcol = _get_column(col, X, twodim=True)
+        if is_categorical[i]:
+            if is_ordinal[i]:
+                encoder = clone(default_encoders['ordinal'])
+                encoder.fit(Xcol)
+                columns = ['Ord({0})'.format(col)]
+            else:
+                encoder = clone(default_encoders['categorical'])
+                cols = encoder.fit_transform(Xcol)
+                if hasattr(encoder, 'columns_'):
+                    columns_ = encoder.columns_
+                else:
+                    columns_ = range(cols.shape[1])
+                columns = ['Cat({0})[{1}]'.format(col, c)
+                           for c in range(cols.shape[1])]
+
+            column_info[col] = Column(col,
+                                      name,
+                                      is_categorical[i],
+                                      is_ordinal[i],
+                                      tuple(columns),
+                                      encoder)
+        else:
+            column_info[col] = Column(col,
+                                      name,
+                                      columns=(name,))
+    return column_info
+
+# extracted from method of BaseHistGradientBoosting from
+# https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+# max_bins is ignored
+
+
+def _check_categories(categorical_features, X):
+    """Check and validate categorical features in X
+
+    Return
+    ------
+    is_categorical : ndarray of shape (n_features,) or None, dtype=bool
+        Indicates whether a feature is categorical. If no feature is
+        categorical, this is None.
+    known_categories : list of size n_features or None
+        The list contains, for each feature:
+            - an array of shape (n_categories,) with the unique cat values
+            - None if the feature is not categorical
+        None if no feature is categorical.
+    """
+    if categorical_features is None:
+        return None, None
+
+    categorical_features = np.asarray(categorical_features)
+
+    if categorical_features.size == 0:
+        return None, None
+
+    if categorical_features.dtype.kind not in ('i', 'b'):
+        raise ValueError("categorical_features must be an array-like of "
+                         "bools or array-like of ints.")
+
+    n_features = X.shape[1]
+
+    # check for categorical features as indices
+    if categorical_features.dtype.kind == 'i':
+        if (np.max(categorical_features) >= n_features
+                or np.min(categorical_features) < 0):
+            raise ValueError("categorical_features set as integer "
+                             "indices must be in [0, n_features - 1]")
+        is_categorical = np.zeros(n_features, dtype=bool)
+        is_categorical[categorical_features] = True
+    else:
+        if categorical_features.shape[0] != n_features:
+            raise ValueError("categorical_features set as a boolean mask "
+                             "must have shape (n_features,), got: "
+                             f"{categorical_features.shape}")
+        is_categorical = categorical_features
+
+    if not np.any(is_categorical):
+        return None, None
+
+    # compute the known categories in the training data. We need to do
+    # that here instead of in the BinMapper because in case of early
+    # stopping, the mapper only gets a fraction of the training data.
+    known_categories = []
+
+    for f_idx in range(n_features):
+        if is_categorical[f_idx]:
+            categories = np.array([v for v in
+                                   set(_get_column(f_idx,
+                                                   X,
+                                                   loc=False))])
+            missing = []
+            for c in categories:
+                try:
+                    missing.append(np.isnan(c))
+                except TypeError:  # not a float
+                    missing.append(False)
+            missing = np.array(missing)
+            if missing.any():
+                categories = sorted(categories[~missing])
+
+        else:
+            categories = None
+        known_categories.append(categories)
+
+    return is_categorical, known_categories
+
+
+def _categorical_from_df(df):
+    """
+    Find
+    """
+    is_categorical = []
+    is_ordinal = []
+    for c in df.columns:
+        try:
+            is_categorical.append(df[c].dtype == 'category')
+            is_ordinal.append(df[c].cat.ordered)
+        except TypeError:
+            is_categorical.append(False)
+            is_ordinal.append(False)
+    is_categorical = np.array(is_categorical)
+    is_ordinal = np.array(is_ordinal)
+
+    return is_categorical, is_ordinal
diff --git a/mlxtend/feature_selection/generic_selector.py b/mlxtend/feature_selection/generic_selector.py
new file mode 100644
index 000000000..3c82b1217
--- /dev/null
+++ b/mlxtend/feature_selection/generic_selector.py
@@ -0,0 +1,522 @@
+# Sebastian Raschka 2014-2020
+# mlxtend Machine Learning Library Extensions
+#
+# Algorithm for sequential feature selection.
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+# Modified by Jonathan Taylor 2021
+#
+# Derives from sequential_feature_selector
+# but allows custom model search
+
+import types
+import sys
+from copy import deepcopy
+
+import numpy as np
+import scipy as sp
+
+from sklearn.metrics import get_scorer
+from sklearn.base import (clone, MetaEstimatorMixin)
+from sklearn.model_selection import cross_val_score
+from joblib import Parallel, delayed
+
+from ..externals.name_estimators import _name_estimators
+from ..utils.base_compostion import _BaseXComposition
+
+class FeatureSelector(_BaseXComposition, MetaEstimatorMixin):
+
+    """Feature Selection for Classification and Regression.
+
+    Parameters
+    ----------
+    estimator: scikit-learn classifier or regressor
+    strategy: Strategy
+        Description of search strategy: a named tuple
+        with fields `initial_state`, 
+        `candidate_states`, `build_submodel`, 
+        `check_finished` and `postprocess`.
+
+    verbose: int (default: 0), level of verbosity to use in logging.
+        If 0, no output,
+        if 1 number of features in current set, if 2 detailed logging 
+        including timestamp and cv scores at step.
+    scoring: str, callable, or None (default: None)
+        If None (default), uses 'accuracy' for sklearn classifiers
+        and 'r2' for sklearn regressors.
+        If str, uses a sklearn scoring metric string identifier, for example
+        {accuracy, f1, precision, recall, roc_auc} for classifiers,
+        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
+        'median_absolute_error', 'r2'} for regressors.
+        If a callable object or function is provided, it has to be conform with
+        sklearn's signature ``scorer(estimator, X, y)``; see
+        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
+        for more information.
+    cv: int (default: 5)
+        Integer or iterable yielding train, test splits. If cv is an integer
+        and `estimator` is a classifier (or y consists of integer class
+        labels) stratified k-fold. Otherwise regular k-fold cross-validation
+        is performed. No cross-validation if cv is None, False, or 0.
+    n_jobs: int (default: 1)
+        The number of CPUs to use for evaluating different feature subsets
+        in parallel. -1 means 'all CPUs'.
+    pre_dispatch: int, or string (default: '2*n_jobs')
+        Controls the number of jobs that get dispatched
+        during parallel execution if `n_jobs > 1` or `n_jobs=-1`.
+        Reducing this number can be useful to avoid an explosion of
+        memory consumption when more jobs get dispatched than CPUs can process.
+        This parameter can be:
+        None, in which case all the jobs are immediately created and spawned.
+            Use this for lightweight and fast-running jobs,
+            to avoid delays due to on-demand spawning of the jobs
+        An int, giving the exact number of total jobs that are spawned
+        A string, giving an expression as a function
+            of n_jobs, as in `2*n_jobs`
+    clone_estimator: bool (default: True)
+        Clones estimator if True; works with the original estimator instance
+        if False. Set to False if the estimator doesn't
+        implement scikit-learn's set_params and get_params methods.
+        In addition, it is required to set cv=0, and n_jobs=1.
+
+    Attributes
+    ----------
+    results_: dict
+        A dictionary of selected feature subsets during the
+        selection, where the dictionary keys are
+        the states of these feature selector. The dictionary
+        values are dictionaries themselves with the following
+        keys: 'scores' (list individual cross-validation scores)
+              'avg_score' (average cross-validation score)
+
+    Notes
+    -----
+
+    See `Strategy` for explanation of the fields.
+
+    Examples
+    -----------
+    For usage examples, please see
+    TBD
+
+    """
+    def __init__(self,
+                 estimator,
+                 strategy,
+                 verbose=0,
+                 scoring=None,
+                 cv=5,
+                 n_jobs=1,
+                 pre_dispatch='2*n_jobs',
+                 clone_estimator=True,
+                 fixed_features=None):
+
+        self.estimator = estimator
+        self.strategy = strategy
+        self.pre_dispatch = pre_dispatch
+        # Want to raise meaningful error message if a
+        # cross-validation generator is inputted
+        if isinstance(cv, types.GeneratorType):
+            err_msg = ('Input cv is a generator object, which is not '
+                       'supported. Instead please input an iterable yielding '
+                       'train, test splits. This can usually be done by '
+                       'passing a cross-validation generator to the '
+                       'built-in list function. I.e. cv=list(<cv-generator>)')
+            raise TypeError(err_msg)
+        self.cv = cv
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+        self.clone_estimator = clone_estimator
+
+        if self.clone_estimator:
+            self.est_ = clone(self.estimator)
+        else:
+            self.est_ = self.estimator
+        self.scoring = scoring
+
+        if scoring is None:
+            if self.est_._estimator_type == 'classifier':
+                scoring = 'accuracy'
+            elif self.est_._estimator_type == 'regressor':
+                scoring = 'r2'
+            else:
+                raise AttributeError('Estimator must '
+                                     'be a Classifier or Regressor.')
+        if isinstance(scoring, str):
+            self.scorer = get_scorer(scoring)
+        else:
+            self.scorer = scoring
+
+        self.fitted = False
+
+        # don't mess with this unless testing
+        self._TESTING_INTERRUPT_MODE = False
+
+    @property
+    def named_estimators(self):
+        """
+        Returns
+        -------
+        List of named estimator tuples, like [('svc', SVC(...))]
+        """
+        return _name_estimators([self.estimator])
+
+    def get_params(self, deep=True):
+        #
+        # Return estimator parameter names for GridSearch support.
+        #
+        return self._get_params('named_estimators', deep=deep)
+
+    def set_params(self, **params):
+        """Set the parameters of this estimator.
+        Valid parameter keys can be listed with ``get_params()``.
+
+        Returns
+        -------
+        self
+        """
+        self._set_params('estimator', 'named_estimators', **params)
+        return self
+
+    def fit(self, X, y, groups=None, **fit_params):
+        """Perform feature selection and learn model from training data.
+
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+        y: array-like, shape = [n_samples]
+            Target values.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for y.
+        groups: array-like, with shape (n_samples,), optional
+            Group labels for the samples used while splitting the dataset into
+            train/test set. Passed to the fit method of the cross-validator.
+        fit_params: various, optional
+            Additional parameters that are being passed to the estimator.
+            For example, `sample_weights=weights`.
+
+        Returns
+        -------
+        self: object
+
+        """
+
+        # reset from a potential previous fit run
+        self.interrupted_ = False
+        self.finished_ = False
+
+        results_ = []
+
+        # unpack the strategy
+        
+        (initial_state,
+         candidate_states,
+         build_submodel,
+         check_finished,
+         postprocess) = self.strategy
+
+        # fit initial model
+
+        _state, _scores = _calc_score(self.estimator,
+                                      self.scorer,
+                                      build_submodel,
+                                      X,
+                                      y,
+                                      initial_state,
+                                      groups=groups,
+                                      cv=self.cv,
+                                      pre_dispatch=self.pre_dispatch,
+                                      **fit_params)
+
+        # keep a running track of the best state
+
+        iteration = 0
+        self.path_ = [deepcopy((_state, iteration, _scores))]
+        cur = best = (_state, iteration, _scores)
+
+        self.update_results_check(results_,
+                                  self.path_,
+                                  cur,
+                                  [(_state, iteration, _scores)],
+                                  check_finished)
+        iteration += 1
+        try:
+            while not self.finished_:
+
+                batch_results = self._batch(iteration,
+                                            cur[0],
+                                            candidate_states(cur[0]),
+                                            build_submodel,
+                                            X,
+                                            y,
+                                            groups=groups,
+                                            **fit_params)
+                iteration += 1
+                cur, best_, self.finished_ = self.update_results_check(results_,
+                                                                       self.path_,
+                                                                       best,
+                                                                       batch_results,
+                                                                       check_finished)
+                if best_:
+                    best = best_
+                self.path_.append(deepcopy(cur))
+
+                if self._TESTING_INTERRUPT_MODE:
+                    raise KeyboardInterrupt
+        except KeyboardInterrupt:
+            self.interrupted_ = True
+            sys.stderr.write('\nSTOPPING EARLY DUE TO KEYBOARD INTERRUPT...')
+
+        self.selected_state_, self.results_ = postprocess(results_)
+
+        self.fitted = True
+        return self
+
+    def transform(self, X):
+        """Reduce X to its most important features.
+
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+
+        Returns
+        -------
+        Reduced feature subset of X, shape={n_samples, k_features}
+
+        """
+        self._check_fitted()
+        build_submodel = self.strategy.build_submodel
+        return build_submodel(X, self.selected_state_)
+
+    def fit_transform(self,
+                      X,
+                      y,
+                      groups=None,
+                      **fit_params):
+        """Fit to training data then reduce X to its most important features.
+
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+        y: array-like, shape = [n_samples]
+            Target values.
+            New in v 0.13.0: a pandas Series are now also accepted as
+            argument for y.
+        groups: array-like, with shape (n_samples,), optional
+            Group labels for the samples used while splitting the dataset into
+            train/test set. Passed to the fit method of the cross-validator.
+        fit_params: various, optional
+            Additional parameters that are being passed to the estimator.
+            For example, `sample_weights=weights`.
+
+        Returns
+        -------
+        Reduced feature subset of X, shape={n_samples, k_features}
+
+        """
+        self.fit(X, y, groups=groups, **fit_params)
+        return self.transform(X)
+
+    def get_metric_dict(self, confidence_interval=0.95):
+        """Return metric dictionary
+
+        Parameters
+        ----------
+        confidence_interval: float (default: 0.95)
+            A positive float between 0.0 and 1.0 to compute the confidence
+            interval bounds of the CV score averages.
+
+        Returns
+        ----------
+        Dictionary with items where each dictionary value is a list
+        with the number of iterations (number of feature subsets) as
+        its length. The dictionary keys corresponding to these lists
+        are as follows:
+            'state': tuple of the indices of the feature subset
+            'scores': list with individual CV scores
+            'avg_score': of CV average scores
+            'std_dev': standard deviation of the CV score average
+            'std_err': standard error of the CV score average
+            'ci_bound': confidence interval bound of the CV score average
+
+        """
+        self._check_fitted()
+        fdict = deepcopy(self.results_)
+
+        def _calc_confidence(ary, confidence=0.95):
+            std_err = sp.stats.sem(ary)
+            bound = std_err * sp.stats.t._ppf((1 + confidence) / 2.0, len(ary))
+            return bound, std_err
+
+        for k in fdict:
+            std_dev = np.std(self.results_[k]['scores'])
+            bound, std_err = self._calc_confidence(
+                self.results_[k]['scores'],
+                confidence=confidence_interval)
+            fdict[k]['ci_bound'] = bound
+            fdict[k]['std_dev'] = std_dev
+            fdict[k]['std_err'] = std_err
+        return fdict
+
+    # private methods
+
+    def _batch(self,
+               iteration,
+               cur_state,
+               candidates,
+               build_submodel,
+               X,
+               y,
+               groups=None,
+               **fit_params):
+
+        results = []
+
+        if candidates is not None:
+
+            parallel = Parallel(n_jobs=self.n_jobs,
+                                verbose=self.verbose,
+                                pre_dispatch=self.pre_dispatch)
+
+            work = parallel(delayed(_calc_score)
+                            (self.estimator,
+                             self.scorer,
+                             build_submodel,
+                             X,
+                             y,
+                             state,
+                             groups=groups,
+                             cv=self.cv,
+                             pre_dispatch=self.pre_dispatch,
+                             **fit_params)
+                            for state in candidates)
+
+            for state, scores in work:
+                results.append((state, iteration, scores))
+
+        return results
+
+    def _check_fitted(self):
+        if not self.fitted:
+            raise AttributeError('{} has not been fitted yet.'.format(self.__class__))
+
+    def update_results_check(self,
+                             results,
+                             path,
+                             best,
+                             batch_results,
+                             check_finished):
+        """
+        Update `results_` with current batch
+        and return a boolean about whether 
+        we should continue or not.
+
+        Parameters
+        ----------
+
+        results: dict
+            Dictionary of all results.
+            Keys are state with values
+            dictionaries having keys
+            `scores`, `avg_scores`.
+
+        best : (state, score)
+            Current best state and score.
+
+        batch_results: dict
+            Dictionary of results from a batch fit.
+            Keys are tate with values
+            dictionaries having keys
+            `scores`, `avg_scores`.
+
+        check_finished: callable
+            Callable taking three arguments 
+            `(results, best_state, batch_results)` which determines if
+            the state generator should step. Often will just check
+            if there is a better score than that at current best state
+            but can use entire set of results if desired.
+
+        Returns
+        -------
+
+        best_state: object
+            State that had the best `avg_score`
+
+        fitted: bool
+            If batch_results is empty, fitting
+            has terminated so return True.
+            Otherwise False.
+
+        """
+
+        finished = batch_results == []
+
+        if not finished:
+
+            (cur,
+             finished) = check_finished(results,
+                                        path,
+                                        best,
+                                        batch_results)
+
+            results.extend(batch_results)
+
+            if cur[2] is not None and np.nanmean(cur[2]) > np.nanmean(best[2]):
+                best = cur
+
+            return (cur,
+                    best,
+                    finished)
+        else:
+            return ((None, None, None),
+                    (None, None, None),
+                    True)
+
+
+# private functions
+
+def _calc_score(estimator,
+                scorer,
+                build_submodel,
+                X,
+                y,
+                state,
+                groups=None,
+                cv=None,
+                pre_dispatch='2*n_jobs',
+                **fit_params):
+    
+    X_state = build_submodel(X, state)
+
+    if cv:
+        scores = cross_val_score(estimator,
+                                 X_state,
+                                 y,
+                                 groups=groups,
+                                 cv=cv,
+                                 scoring=scorer,
+                                 n_jobs=1,
+                                 pre_dispatch=pre_dispatch,
+                                 fit_params=fit_params)
+    else:
+        estimator.fit(X_state,
+                      y,
+                      **fit_params)
+        scores = np.array([scorer(estimator,
+                                  X_state,
+                                  y)])
+    return state, scores
+
diff --git a/mlxtend/feature_selection/strategy.py b/mlxtend/feature_selection/strategy.py
new file mode 100644
index 000000000..719247e60
--- /dev/null
+++ b/mlxtend/feature_selection/strategy.py
@@ -0,0 +1,771 @@
+# Jonathan Taylor 2021
+# mlxtend Machine Learning Library Extensions
+#
+# Objects describing search strategy
+# Author: Jonathan Taylor <jonathan.taylor@stanford.edu>
+# 
+
+from typing import NamedTuple, Any, Callable
+from itertools import chain, combinations
+from functools import partial
+
+import numpy as np
+from sklearn.utils import check_random_state
+
+from .columns import (_get_column_info,
+                     Column,
+                     _categorical_from_df,
+                     _check_categories)
+
+class Strategy(NamedTuple):
+
+    """
+    initial_state: object
+        Initial state of feature selector.
+    state_generator: callable
+        Callable taking single argument `state` and returning
+        candidates for next batch of scores to be calculated.
+    build_submodel: callable
+        Callable taking two arguments `(X, state)` that returns
+        model matrix represented by `state`.
+    check_finished: callable
+        Callable taking three arguments 
+        `(results, best_state, batch_results)` which determines if
+        the state generator should step. Often will just check
+        if there is a better score than that at current best state
+        but can use entire set of results if desired.
+    """
+
+    initial_state: Any
+    candidate_states: Callable
+    build_submodel: Callable
+    check_finished: Callable
+    postprocess: Callable
+
+      
+class MinMaxCandidates(object):
+
+    def __init__(self,
+                 X,
+                 min_features=1,
+                 max_features=1,
+                 fixed_features=None,
+                 custom_feature_names=None,
+                 categorical_features=None):
+        """
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+        min_features: int (default: 1)
+            Minumum number of features to select
+        max_features: int (default: 1)
+            Maximum number of features to select
+        fixed_features: column identifiers, default=None
+            Subset of features to keep. Stored as `self.columns[fixed_features]`
+            where `self.columns` will correspond to columns if X is a `pd.DataFrame`
+            or an array of integers if X is an `np.ndarray`
+        custom_feature_names: None or tuple (default: tuple)
+                Custom feature names for `self.k_feature_names` and
+                `self.subsets_[i]['feature_names']`.
+                (new in v 0.13.0)
+        categorical_features: array-like of {bool, int} of shape (n_features) 
+                or shape (n_categorical_features,), default=None.
+            Indicates the categorical features.
+
+            - None: no feature will be considered categorical.
+            - boolean array-like: boolean mask indicating categorical features.
+            - integer array-like: integer indices indicating categorical
+              features.
+
+            For each categorical feature, there must be at most `max_bins` unique
+            categories, and each categorical value must be in [0, max_bins -1].
+
+        """
+
+        if hasattr(X, 'loc'):
+            X_ = X.values
+            is_categorical, is_ordinal = _categorical_from_df(X)
+            self.columns = X.columns
+        else:
+            X_ = X
+            is_categorical = _check_categories(categorical_features,
+                                               X_)[0]
+            if is_categorical is None:
+                is_categorical = np.zeros(X_.shape[1], np.bool)
+            is_ordinal = np.zeros_like(is_categorical)
+            self.columns = np.arange(X.shape[1])
+
+        nfeatures = X_.shape[1]
+
+        if (not isinstance(max_features, int) or
+                (max_features > nfeatures or max_features < 0)):
+            raise AttributeError('max_features must be'
+                                 ' <= than %d and >= 0' %
+                                 (nfeatures + 1))
+
+        if (not isinstance(min_features, int) or
+                (min_features > nfeatures or min_features < 0)):
+            raise AttributeError('min_features must be'
+                                 ' <= %d and >= 0'
+                                 % (nfeatures + 1))
+
+        if max_features < min_features:
+            raise AttributeError('min_features must be <= max_features')
+
+        self.min_features, self.max_features = min_features, max_features
+
+        # make a mapping from the column info to columns in
+        # implied design matrix
+
+        self.column_info_ = _get_column_info(X,
+                                             self.columns,
+                                             is_categorical,
+                                             is_ordinal)
+        self.column_map_ = {}
+        idx = 0
+        for col in self.columns:
+            l = self.column_info_[col].columns
+            self.column_map_[col] = range(idx, idx +
+                                          len(l))
+            idx += len(l)
+        if (custom_feature_names is not None
+                and len(custom_feature_names) != nfeatures):
+            raise ValueError('If custom_feature_names is not None, '
+                             'the number of elements in custom_feature_names '
+                             'must equal %d the number of columns in X.' % idx)
+        if custom_feature_names is not None:
+            # recompute the Column info using custom_feature_names
+            for i, col in enumerate(self.columns):
+                cur_col = self.column_info_[col]
+                new_name = custom_feature_names[i]
+                old_name = cur_col.name
+                self.column_info_[col] = Column(col,
+                                                new_name,
+                                                col.is_categorical,
+                                                col.is_ordinal,
+                                                tuple([n.replace(old_name,
+                                                                 new_name) for n in col.columns]),
+                                                col.encoder)
+
+        if fixed_features is not None:
+            self.fixed_features = set([self.column_info_[f].idx for f in fixed_features])
+        else:
+            self.fixed_features = set([])
+            
+    def candidate_states(self, state):
+        """
+        Produce candidates for fitting.
+
+        Parameters
+        ----------
+
+        state: ignored
+
+        Returns
+        -------
+        candidates: iterator
+            A generator of (indices, label) where indices
+            are columns of X and label is a name for the 
+            given model. The iterator cycles through
+            all combinations of columns of nfeature total
+            of size ranging between min_features and max_features.
+            If appropriate, restricts combinations to include
+            a set of fixed features.
+            Models are labeled with a tuple of the feature names.
+            The names of the columns default to strings of integers
+            from range(nfeatures).
+
+        """
+
+        def chain_(i):
+            return (c for c in combinations(self.columns, r=i)
+                    if self.fixed_features.issubset(c))
+
+        candidates = chain.from_iterable(chain_(i) for i in
+                                         range(self.min_features,
+                                               self.max_features+1))
+        return candidates
+        
+    def check_finished(self,
+                       results,
+                       path,
+                       best,
+                       batch_results):
+        """
+        Check if we should continue or not. 
+        For exhaustive search we stop because
+        all models are fit in a single batch.
+        """
+        new_best = (None, None, None)
+        batch_best_score = -np.inf
+        
+        for (state, iteration, scores) in batch_results:
+            avg_score = np.nanmean(scores)
+            if avg_score > batch_best_score:
+                new_best = (state, iteration, scores)
+                batch_best_score = np.nanmean(scores)
+
+        return new_best, True
+
+
+class Stepwise(MinMaxCandidates):
+
+    def __init__(self,
+                 X,
+                 direction,
+                 min_features=1,
+                 max_features=1,
+                 fixed_features=None,
+                 custom_feature_names=None,
+                 categorical_features=None):
+        """
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+        direction: str
+            One of ['forward', 'backward', 'both']
+        min_features: int (default: 1)
+            Minumum number of features to select
+        max_features: int (default: 1)
+            Maximum number of features to select
+        fixed_features: column identifiers, default=None
+            Subset of features to keep. Stored as `self.columns[fixed_features]`
+            where `self.columns` will correspond to columns if X is a `pd.DataFrame`
+            or an array of integers if X is an `np.ndarray`
+        custom_feature_names: None or tuple (default: tuple)
+                Custom feature names for `self.k_feature_names` and
+                `self.subsets_[i]['feature_names']`.
+                (new in v 0.13.0)
+        categorical_features: array-like of {bool, int} of shape (n_features) 
+                or shape (n_categorical_features,), default=None.
+            Indicates the categorical features.
+
+            - None: no feature will be considered categorical.
+            - boolean array-like: boolean mask indicating categorical features.
+            - integer array-like: integer indices indicating categorical
+              features.
+
+            For each categorical feature, there must be at most `max_bins` unique
+            categories, and each categorical value must be in [0, max_bins -1].
+
+        """
+
+        self.direction = direction
+
+        MinMaxCandidates.__init__(self,
+                                  X,
+                                  min_features,
+                                  max_features,
+                                  fixed_features,
+                                  custom_feature_names,
+                                  categorical_features)
+            
+    def candidate_states(self, state):
+        """
+        Produce candidates for fitting.
+        For stepwise search this depends on the direction.
+
+        If 'forward', all columns not in the current state
+        are added (maintaining an upper limit on the number of columns 
+        at `self.max_features`).
+
+        If 'backward', all columns not in the current state
+        are dropped (maintaining a lower limit on the number of columns 
+        at `self.min_features`).
+
+        All candidates include `self.fixed_features` if any.
+        
+        Parameters
+        ----------
+
+        state: ignored
+
+        Returns
+        -------
+        candidates: iterator
+            A generator of (indices, label) where indices
+            are columns of X and label is a name for the 
+            given model. The iterator cycles through
+            all combinations of columns of nfeature total
+            of size ranging between min_features and max_features.
+            If appropriate, restricts combinations to include
+            a set of fixed features.
+            Models are labeled with a tuple of the feature names.
+            The names of the columns default to strings of integers
+            from range(nfeatures).
+
+        """
+
+        state = set(state)
+        if len(state) < self.max_features: # union
+            forward = (tuple(sorted(state | set([c]))) for c in self.columns if (c not in state and
+                                                                self.fixed_features.issubset(state | set([c]))))
+        else:
+            forward = []
+
+        if len(state) > self.min_features: # symmetric difference
+            backward = (tuple(sorted(state ^ set([c]))) for c in self.columns if (c in state and
+                                                                   self.fixed_features.issubset(state ^ set([c]))))
+        else:
+            backward = []
+
+        if self.direction == 'forward':
+            return forward
+        elif self.direction == 'backward':
+            return backward
+        else:
+            return chain.from_iterable([forward, backward])
+       
+    @staticmethod
+    def first_peak(X,
+                   direction='forward',
+                   min_features=1,
+                   max_features=1,
+                   fixed_features=None,
+                   initial_features=[],
+                   custom_feature_names=None,
+                   categorical_features=None,
+                   parsimonious=True):
+        """
+        Strategy that stops when no improvement
+        in score is possible.
+
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+        direction: str
+            One of ['forward', 'backward', 'both']
+        min_features: int (default: 1)
+            Minumum number of features to select
+        max_features: int (default: 1)
+            Maximum number of features to select
+        fixed_features: column identifiers, default=None
+            Subset of features to keep. Stored as `self.columns[fixed_features]`
+            where `self.columns` will correspond to columns if X is a `pd.DataFrame`
+            or an array of integers if X is an `np.ndarray`
+        initial_features: column identifiers, default=[]
+            Subset of features to be used to initialize.
+        custom_feature_names: None or tuple (default: tuple)
+                Custom feature names for `self.k_feature_names` and
+                `self.subsets_[i]['feature_names']`.
+                (new in v 0.13.0)
+        categorical_features: array-like of {bool, int} of shape (n_features) 
+                or shape (n_categorical_features,), default=None.
+            Indicates the categorical features.
+
+            - None: no feature will be considered categorical.
+            - boolean array-like: boolean mask indicating categorical features.
+            - integer array-like: integer indices indicating categorical
+              features.
+
+            For each categorical feature, there must be at most `max_bins` unique
+            categories, and each categorical value must be in [0, max_bins -1].
+
+        parsimonious: bool
+            If True, use the 1sd rule: among the shortest models
+            within one standard deviation of the best score
+            pick the one with the best average score. 
+
+        Returns
+        -------
+
+        strategy : NamedTuple
+
+        """
+
+        step = Stepwise(X,
+                        direction,
+                        min_features,
+                        max_features,
+                        fixed_features,
+                        custom_feature_names,
+                        categorical_features)
+
+        # if any categorical features or an intercept
+        # is included then we must
+        # create a new design matrix
+
+        build_submodel = partial(_build_submodel, step.column_info_)
+
+        # pick an initial state
+
+        initial_state = tuple(initial_features)
+
+        if not step.fixed_features.issubset(initial_features):
+            raise ValueError('initial_features should contain %s' % str(step.fixed_features))
+
+        if not parsimonious:
+            _postprocess = _postprocess_best
+        else:
+            _postprocess = _postprocess_best_1sd
+
+        return Strategy(initial_state,
+                        step.candidate_states,
+                        build_submodel,
+                        first_peak,
+                        _postprocess)
+
+    @staticmethod
+    def fixed_size(X,
+                   model_size,
+                   direction='forward',
+                   min_features=1,
+                   max_features=1,
+                   fixed_features=None,
+                   initial_features=[],
+                   custom_feature_names=None,
+                   categorical_features=None,
+                   parsimonious=True):
+        """
+        Strategy that stops first time
+        a given model size is reached.
+
+        Parameters
+        ----------
+        X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+            New in v 0.13.0: pandas DataFrames are now also accepted as
+            argument for X.
+        direction: str
+            One of ['forward', 'backward', 'both']
+        min_features: int (default: 1)
+            Minumum number of features to select
+        max_features: int (default: 1)
+            Maximum number of features to select
+        fixed_features: column identifiers, default=None
+            Subset of features to keep. Stored as `self.columns[fixed_features]`
+            where `self.columns` will correspond to columns if X is a `pd.DataFrame`
+            or an array of integers if X is an `np.ndarray`
+        initial_features: column identifiers, default=[]
+            Subset of features to be used to initialize.
+        custom_feature_names: None or tuple (default: tuple)
+                Custom feature names for `self.k_feature_names` and
+                `self.subsets_[i]['feature_names']`.
+                (new in v 0.13.0)
+        categorical_features: array-like of {bool, int} of shape (n_features) 
+                or shape (n_categorical_features,), default=None.
+            Indicates the categorical features.
+
+            - None: no feature will be considered categorical.
+            - boolean array-like: boolean mask indicating categorical features.
+            - integer array-like: integer indices indicating categorical
+              features.
+
+            For each categorical feature, there must be at most `max_bins` unique
+            categories, and each categorical value must be in [0, max_bins -1].
+
+        parsimonious: bool
+            If True, use the 1sd rule: among the shortest models
+            within one standard deviation of the best score
+            pick the one with the best average score. 
+
+        Returns
+        -------
+
+        strategy : NamedTuple
+
+        """
+
+        step = Stepwise(X,
+                        direction,
+                        min_features,
+                        max_features,
+                        fixed_features,
+                        custom_feature_names,
+                        categorical_features)
+
+        # if any categorical features or an intercept
+        # is included then we must
+        # create a new design matrix
+
+        build_submodel = partial(_build_submodel, step.column_info_)
+
+        # pick an initial state
+
+        initial_state = tuple(initial_features)
+
+        if not step.fixed_features.issubset(initial_features):
+            raise ValueError('initial_features should contain %s' % str(step.fixed_features))
+
+        if not parsimonious:
+            _postprocess = _postprocess_best
+        else:
+            _postprocess = _postprocess_best_1sd
+
+        return Strategy(initial_state,
+                        step.candidate_states,
+                        build_submodel,
+                        partial(fixed_size, model_size),
+                        partial(_postprocess_fixed_size, model_size))
+    
+
+
+def exhaustive(X,
+               min_features=1,
+               max_features=1,
+               fixed_features=None,
+               custom_feature_names=None,
+               categorical_features=None,
+               parsimonious=True):
+    """
+    Parameters
+    ----------
+    X: {array-like, sparse matrix}, shape = [n_samples, n_features]
+        Training vectors, where n_samples is the number of samples and
+        n_features is the number of features.
+        New in v 0.13.0: pandas DataFrames are now also accepted as
+        argument for X.
+    min_features: int (default: 1)
+        Minumum number of features to select
+    max_features: int (default: 1)
+        Maximum number of features to select
+    fixed_features: column identifiers, default=None
+        Subset of features to keep. Stored as `self.columns[fixed_features]`
+        where `self.columns` will correspond to columns if X is a `pd.DataFrame`
+        or an array of integers if X is an `np.ndarray`
+    custom_feature_names: None or tuple (default: tuple)
+            Custom feature names for `self.k_feature_names` and
+            `self.subsets_[i]['feature_names']`.
+            (new in v 0.13.0)
+    categorical_features: array-like of {bool, int} of shape (n_features) 
+            or shape (n_categorical_features,), default=None.
+        Indicates the categorical features.
+
+        - None: no feature will be considered categorical.
+        - boolean array-like: boolean mask indicating categorical features.
+        - integer array-like: integer indices indicating categorical
+          features.
+
+        For each categorical feature, there must be at most `max_bins` unique
+        categories, and each categorical value must be in [0, max_bins -1].
+
+    parsimonious: bool
+        If True, use the 1sd rule: among the shortest models
+        within one standard deviation of the best score
+        pick the one with the best average score. 
+
+    Returns
+    -------
+
+    initial_state: tuple
+        (column_names, feature_idx)
+
+    state_generator: callable
+        Object that proposes candidates
+        based on current state. Takes a single 
+        argument `state`
+
+    build_submodel: callable
+        Candidate generator that enumerate
+        all valid subsets of columns.
+
+    check_finished: callable
+        Check whether to stop. Takes two arguments:
+        `best_result` a dict with keys ['scores', 'avg_score'];
+        and `state`.
+
+    """
+
+    strategy = MinMaxCandidates(X,
+                                min_features,
+                                max_features,
+                                fixed_features,
+                                custom_feature_names,
+                                categorical_features)
+    
+    # if any categorical features or an intercept
+    # is included then we must
+    # create a new design matrix
+
+    build_submodel = partial(_build_submodel, strategy.column_info_)
+
+    if strategy.fixed_features:
+        initial_features = sorted(strategy.fixed_features)
+    else:
+        initial_features = range(strategy.min_features)
+    initial_state = tuple(initial_features)
+
+    if not parsimonious:
+        _postprocess = _postprocess_best
+    else:
+        _postprocess = _postprocess_best_1sd
+
+    return Strategy(initial_state,
+                    strategy.candidate_states,
+                    build_submodel,
+                    strategy.check_finished,
+                    _postprocess)
+
+def first_peak(results,
+               path,
+               best,
+               batch_results):
+    """
+    Check if we should continue or not. 
+
+    For first_peak search we stop if we cannot improve
+    over our current best score.
+
+    """
+    new_best = (None, None, None)
+    batch_best_score = -np.inf
+
+    for state, iteration, scores in batch_results:
+        avg_score = np.nanmean(scores)
+        if avg_score > batch_best_score:
+            new_best = (state, iteration, scores)
+            batch_best_score = avg_score
+
+    any_better = batch_best_score > np.nanmean(best[2])
+    return new_best, not any_better
+
+def fixed_size(model_size,
+               results,
+               path,
+               best,
+               batch_results):
+    """
+    Check if we should continue or not. 
+
+    For first_peak search we stop if we cannot improve
+    over our current best score.
+
+    """
+    new_best = (None, None, None)
+    batch_best_score = -np.inf
+
+    for state, iteration, scores in batch_results:
+        avg_score = np.nanmean(scores)
+        if avg_score > batch_best_score:
+            new_best = (state, iteration, scores)
+            batch_best_score = avg_score
+
+    any_better = batch_best_score > np.nanmean(best[2])
+    return new_best, len(new_best[0]) == model_size
+
+
+# private functions
+
+
+def _build_submodel(column_info, X, cols):
+    if cols:
+        return np.column_stack([column_info[col].get_columns(X, fit=True)[0] for col in cols])
+    else:
+        return np.zeros((X.shape[0], 1))
+    
+def _postprocess_fixed_size(model_size, results):
+    """
+    Find the best state from `results`
+    based on `avg_score`.
+
+    Return best state and results
+    """
+
+    best_state = None
+    best_score = -np.inf
+
+    new_results = {}
+    for (state, iteration, scores) in results:
+        new_state = tuple(state) # [v.name for v in state])
+        avg_score = np.nanmean(scores)
+        if avg_score > best_score and len(new_state) == model_size:
+            best_state = new_state
+            best_score = avg_score
+        new_results[new_state] = avg_score
+    return best_state, new_results
+    
+def _postprocess_best(results):
+    """
+    Find the best state from `results`
+    based on `avg_score`.
+
+    Return best state and results
+    """
+
+    best_state = None
+    best_score = -np.inf
+
+    new_results = {}
+    for (state, iteration, scores) in results:
+        new_state = tuple(state) # [v.name for v in state])
+        avg_score = np.nanmean(scores)
+        if avg_score > best_score:
+            best_state = new_state
+            best_score = avg_score
+        new_results[new_state] = avg_score
+    return best_state, new_results
+
+
+def _postprocess_best_1sd(results):
+    """
+    Find the best state from `results`
+    based on np.nanmean(scores)
+
+    Find models satisfying the 1sd rule
+    and choose the state with best score
+    among the smallest such states.
+
+    Return best state and results
+
+    Models are compared by length of state
+    """
+
+    best_state = None
+    best_score = -np.inf
+
+    for state, iteration, scores in results:
+        avg_score = np.nanmean(scores)
+        if avg_score > best_score:
+            best_state = state
+            best_score = avg_score
+
+    states_1sd = []
+
+    for (state, iteration, scores) in results:
+        if len(state) >= len(best_state):
+            continue
+        _limit = (np.nanmean(scores) + 
+                  np.nanstd(scores) / np.sqrt(scores.shape[0]))
+        if _limit >= best_score:
+            states_1sd.append((state, iteration, scores))
+
+    shortest_1sd = np.inf
+
+    for (state, iteration, scores) in states_1sd:
+        if len(state) < shortest_1sd:
+            shortest_1sd = len(state)
+            
+    best_state_1sd = None
+    best_score_1sd = -np.inf
+
+    for (state, iteration, scores) in states_1sd:
+        avg_score = np.nanmean(scores)
+        if ((len(state) == shortest_1sd)
+            and (avg_score <=
+                 best_score_1sd)):
+            best_state_1sd = state
+            best_score_1sd = avg_score
+            
+    new_results = {}
+    for (state, iteration, scores) in results:
+        new_state = tuple(state) #[v.name for v in state])
+        new_results[new_state] = np.nanmean(scores)
+    if best_state_1sd:
+        best_state_1sd = tuple([v.name for v in best_state_1sd])
+        return best_state_1sd, new_results
+    else:
+        best_state = tuple(best_state) # [v.name for v in best_state])
+        return best_state, new_results
diff --git a/mlxtend/feature_selection/tests/test_generic_selector.py b/mlxtend/feature_selection/tests/test_generic_selector.py
new file mode 100644
index 000000000..1164dd539
--- /dev/null
+++ b/mlxtend/feature_selection/tests/test_generic_selector.py
@@ -0,0 +1,418 @@
+from itertools import product
+
+import pytest
+import warnings
+
+import numpy as np
+from sklearn.linear_model import LinearRegression
+from mlxtend.feature_selection.generic_selector import FeatureSelector
+from mlxtend.feature_selection.strategy import exhaustive, Stepwise
+
+
+have_pandas = True
+try:
+    import pandas as pd
+except ImportError:
+    have_pandas = False
+    
+def test_exhaustive_selector():
+
+    n, p = 100, 5
+    X = np.random.standard_normal((n, p))
+    Y = np.random.standard_normal(n)
+
+    strategy = exhaustive(X,
+                          max_features=4,
+                          fixed_features=[2,3])
+
+    exhaustive_selector = FeatureSelector(LinearRegression(),
+                                          strategy)
+
+    exhaustive_selector.fit(X, Y)
+
+    strategy = exhaustive(X,
+                          max_features=4)
+
+    exhaustive_selector = FeatureSelector(LinearRegression(),
+                                          strategy)
+
+    exhaustive_selector.fit(X, Y)
+    exhaustive_selector.transform(X)
+    exhaustive_selector.fit_transform(X, Y)
+
+    # test CV
+
+    strategy = exhaustive(X,
+                          max_features=5)
+
+    exhaustive_selector = FeatureSelector(LinearRegression(),
+                                          strategy,
+                                          cv=3)
+    # test CV, verbose
+
+    strategy = exhaustive(X,
+                          max_features=3)
+
+    for cv, verbose in product([None, 3],
+                               [0,1,2]):
+
+        exhaustive_selector = FeatureSelector(LinearRegression(),
+                                              strategy,
+                                              cv=cv,
+                                              verbose=verbose)
+
+        exhaustive_selector.fit(X, Y)
+        print(cv, verbose, exhaustive_selector.selected_state_)
+
+def test_exhaustive_categorical():
+
+    n, p = 100, 5
+    X = np.random.standard_normal((n, p))
+    X[:,0] = np.random.choice(range(5), (n,), replace=True)
+    Y = np.random.standard_normal(n)
+
+    categorical_features = [True] + [False]*4
+    strategy = exhaustive(X,
+                          max_features=4,
+                          fixed_features=[2,3],
+                          categorical_features=categorical_features)
+
+    exhaustive_selector = FeatureSelector(LinearRegression(),
+                                          strategy)
+
+    exhaustive_selector.fit(X, Y)
+
+
+def test_step_categorical():
+
+    n, p = 100, 10
+    X = np.random.standard_normal((n, p))
+    X[:,0] = np.random.choice(range(5), (n,), replace=True)
+    Y = np.random.standard_normal(n)
+
+    categorical_features = [True] + [False]*9
+    strategy = Stepwise.first_peak(X,
+                                   max_features=4,
+                                   fixed_features=[2,3],
+                                   initial_features=[2,3],
+                                   categorical_features=categorical_features)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy)
+    step_selector.fit(X, Y)
+
+def test_step_scoring():
+
+    n, p = 100, 10
+
+    def AIC(estimator, X, Y):
+        Yhat = estimator.predict(X)
+        return 0.5 * ((Y - Yhat)**2).sum() + 2 * X.shape[1]
+    
+    X = np.random.standard_normal((n, p))
+    X[:,0] = np.random.choice(range(5), (n,), replace=True)
+    Y = np.random.standard_normal(n)
+
+    categorical_features = [True] + [False]*9
+    strategy = Stepwise.first_peak(X,
+                                   max_features=4,
+                                   fixed_features=[2,3],
+                                   initial_features=[2,3],
+                                   categorical_features=categorical_features)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring='neg_mean_squared_error')
+    step_selector.fit(X, Y)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring='neg_mean_squared_error',
+                                    cv=None)
+    step_selector.fit(X, Y)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring=AIC,
+                                    cv=None)
+    step_selector.fit(X, Y)
+    
+def test_step_bigger():
+
+    n, p = 100, 20
+    X = np.random.standard_normal((n, p))
+    X[:,0] = np.random.choice(range(5), (n,), replace=True)
+    Y = np.random.standard_normal(n)
+
+    categorical_features = [True] + [False]*(p-1)
+
+    for direction in ['forward', 'backward', 'both']:
+        strategy = Stepwise.first_peak(X,
+                                       direction=direction,
+                                       max_features=p,
+                                       fixed_features=[2,3],
+                                       initial_features=[2,3],
+                                       categorical_features=categorical_features)
+
+        step_selector = FeatureSelector(LinearRegression(),
+                                        strategy)
+
+        step_selector.fit(X, Y)
+
+@pytest.mark.skipif(not have_pandas, reason='pandas unavailable')
+def test_pandas1():
+
+    n, p = 100, 5
+    X = np.random.standard_normal((n, p))
+    Y = np.random.standard_normal(n)
+    D = pd.DataFrame(X, columns=['A', 'B', 'C', 'D', 'E'])
+    D['A'] = pd.Categorical(np.random.choice(range(5), (n,), replace=True))
+
+    for direction in ['forward', 'backward', 'both']:
+
+        strategy = Stepwise.first_peak(D,
+                                       direction=direction,
+                                       max_features=p,
+                                       fixed_features=['A','C'],
+                                       initial_features=['A','C'])
+        
+        step_selector = FeatureSelector(LinearRegression(),
+                                        strategy,
+                                        cv=3)
+
+        step_selector.fit(D, Y)
+        print(step_selector.path_)
+
+def neg_AIC(linmod, X, Y):
+    """
+    Negative AIC for linear regression model
+    """
+    n, p = X.shape
+    Yhat = linmod.predict(X)
+    sigma_MLE = np.linalg.norm(Y - Yhat) / np.sqrt(n)
+    # adding 2 -- one for sigma^2, one for intercept fit by LinearRegression
+    return -(n * np.log(2 * np.pi * sigma_MLE**2) + n + 2 * (p + 2)) 
+ 
+def test_boston_forward():
+    """Ran the following R cell
+    ```{R}
+    library(MASS)
+    data(Boston)
+    M=step(glm(medv ~ 1, data=Boston), list(upper=glm(medv ~ ., data=Boston)), direction='forward', trace=FALSE)
+    print(AIC(M))
+    names(coef(M)[2:12])
+    ```
+
+    Output:
+
+    [1] 3023.726
+    [1] "lstat"   "rm"      "ptratio" "dis"     "nox"     "chas"    "black"  
+    [8] "zn"      "crim"    "rad"     "tax"    
+    """
+
+    try:
+        import statsmodels.api as sm
+    except ImportError as e:
+        warnings.warn('import of statsmodels failed')
+        return
+    
+    selected_R = ["lstat", "rm", "ptratio", "dis",
+                  "nox", "chas", "black",
+                  "zn", "crim",  "rad", "tax"]
+
+    Boston = sm.datasets.get_rdataset('Boston', package='MASS').data
+    D = Boston.drop('medv', axis=1)
+    X = D.values
+    Y = Boston['medv']
+
+    strategy = Stepwise.first_peak(X,
+                                   max_features=X.shape[1],
+                                   min_features=0,
+                                   direction='forward',
+                                   parsimonious=False)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring=neg_AIC,
+                                    cv=None)
+    step_selector.fit(X, Y)
+
+    selected_vars = [D.columns[j] for j in step_selector.selected_state_]
+
+    Xsel = X[:,list(step_selector.selected_state_)]
+    selected_model = LinearRegression().fit(Xsel, Y)
+    
+    assert(sorted(selected_vars) == sorted(selected_R))
+    assert(np.fabs(neg_AIC(selected_model, Xsel, Y) + 3023.726) < 0.01)
+
+def test_boston_both():
+    """Ran the following R cell
+    ```{R}
+    %%R
+    library(MASS)
+    data(Boston)
+    M=step(glm(medv ~ ., data=Boston), list(upper=glm(medv ~ 1, data=Boston)), direction='both', trace=FALSE)
+    print(AIC(M))
+    names(coef(M)[2:12])
+    ```
+
+    Output:
+
+    [1] 3023.726
+     [1] "crim"    "zn"      "chas"    "nox"     "rm"      "dis"     "rad"    
+     [8] "tax"     "ptratio" "black"   "lstat"  
+
+    """
+
+    try:
+        import statsmodels.api as sm
+    except ImportError as e:
+        warnings.warn('import of statsmodels failed')
+        return
+    
+    selected_R = ["crim", "zn", "chas", "nox", "rm", "dis", "rad",
+                  "tax", "ptratio", "black", "lstat"]
+
+    Boston = sm.datasets.get_rdataset('Boston', package='MASS').data
+    D = Boston.drop('medv', axis=1)
+    X = D.values
+    Y = Boston['medv']
+
+    strategy = Stepwise.first_peak(X,
+                                   max_features=X.shape[1],
+                                   min_features=0,
+                                   direction='both',
+                                   parsimonious=False)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring=neg_AIC,
+                                    cv=None)
+    step_selector.fit(X, Y)
+
+    selected_vars = [D.columns[j] for j in step_selector.selected_state_]
+
+    Xsel = X[:,list(step_selector.selected_state_)]
+    selected_model = LinearRegression().fit(Xsel, Y)
+    
+    assert(sorted(selected_vars) == sorted(selected_R))
+    assert(np.fabs(neg_AIC(selected_model, Xsel, Y) + 3023.726) < 0.01)
+
+def test_boston_back():
+    """Ran the following R cell
+    ```{R}
+    %%R
+    library(MASS)
+    data(Boston)
+    M=step(glm(medv ~ ., data=Boston), list(upper=glm(medv ~ 1, data=Boston)), direction='back', trace=FALSE)
+    print(AIC(M))
+    names(coef(M)[2:12])
+    ```
+
+    Output:
+
+    [1] 3023.726
+     [1] "crim"    "zn"      "chas"    "nox"     "rm"      "dis"     "rad"    
+     [8] "tax"     "ptratio" "black"   "lstat"  
+
+    """
+
+    try:
+        import statsmodels.api as sm
+    except ImportError as e:
+        warnings.warn('import of statsmodels failed')
+        return
+    
+    selected_R = ["crim", "zn", "chas", "nox", "rm", "dis", "rad",
+                  "tax", "ptratio", "black", "lstat"]
+
+    Boston = sm.datasets.get_rdataset('Boston', package='MASS').data
+    D = Boston.drop('medv', axis=1)
+    X = D.values
+    Y = Boston['medv']
+
+    strategy = Stepwise.first_peak(X,
+                                   max_features=X.shape[1],
+                                   direction='backward',
+                                   initial_features=list(range(X.shape[1])),
+                                   parsimonious=False)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring=neg_AIC,
+                                    cv=None)
+    step_selector.fit(X, Y)
+
+    selected_vars = [D.columns[j] for j in step_selector.selected_state_]
+
+    Xsel = X[:,list(step_selector.selected_state_)]
+    selected_model = LinearRegression().fit(Xsel, Y)
+    
+    assert(sorted(selected_vars) == sorted(selected_R))
+    assert(np.fabs(neg_AIC(selected_model, Xsel, Y) + 3023.726) < 0.01)
+
+def test_boston_forward_3step():
+    """Ran the following R cell
+    ```{R}
+    library(MASS)
+    data(Boston)
+    M=step(glm(medv ~ 1, data=Boston), list(upper=glm(medv ~ ., data=Boston)), direction='forward', trace=FALSE, steps=3)
+    print(AIC(M))
+    names(coef(M)[2:4])
+    ```
+
+    Output:
+
+    [1] 3116.097
+    [1] "lstat"       "rm"          "ptratio"   
+
+    """
+
+    try:
+        import statsmodels.api as sm
+    except ImportError as e:
+        warnings.warn('import of statsmodels failed')
+        return
+    
+    selected_R = ["lstat", "rm", "ptratio"]
+
+    Boston = sm.datasets.get_rdataset('Boston', package='MASS').data
+    D = Boston.drop('medv', axis=1)
+    X = D.values
+    Y = Boston['medv']
+
+    strategy = Stepwise.first_peak(X,
+                                   max_features=3,
+                                   min_features=3,
+                                   initial_features=[],
+                                   direction='forward',
+                                   parsimonious=False)
+
+    step_selector = FeatureSelector(LinearRegression(),
+                                    strategy,
+                                    scoring=neg_AIC,
+                                    cv=None)
+    step_selector.fit(X, Y)
+    selected_vars = [D.columns[j] for j in step_selector.selected_state_]
+
+    strategy2 = Stepwise.fixed_size(X,
+                                    3,
+                                    max_features=X.shape[1],
+                                    min_features=0,
+                                    initial_features=[],
+                                    direction='forward')
+    step_selector2 = FeatureSelector(LinearRegression(),
+                                     strategy2,
+                                     scoring=neg_AIC,
+                                     cv=None)
+    step_selector2.fit(X, Y)
+    selected_vars2 = [D.columns[j] for j in step_selector2.selected_state_]
+    print(selected_vars, selected_vars2)
+
+    Xsel = X[:,list(step_selector.selected_state_)]
+    selected_model = LinearRegression().fit(Xsel, Y)
+    
+    assert(sorted(selected_vars) == sorted(selected_R))
+    assert(sorted(selected_vars2) == sorted(selected_R))
+    assert(np.fabs(neg_AIC(selected_model, Xsel, Y) + 3116.097) < 0.01)
+