msmbuilder · RobertArbon · Oct 6, 2018 · Oct 6, 2018 · Oct 6, 2018 · Oct 6, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -4,12 +4,15 @@ sudo: false
 
 env:
     matrix:
-        - CONDA_PY=3.5
+#        - CONDA_PY=3.5
         - CONDA_PY=3.6
-
+        - CONDA_PY=3.7
+#        - CONDA_PY=3.8
 branches:
   only:
-    - master
+    - Development
+    - update_to_py37
+#    - master
 
 install:
     - source devtools/travis-ci/install_miniconda.sh

diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml
@@ -26,15 +26,14 @@ requirements:
     - bokeh
     - matplotlib
     - pandas
+    - joblib
 
 test:
 
   requires:
     - nose
     - nose-timer
     - gpy
-    - msmbuilder
-    - msmb_data
     - mdtraj
     - hyperopt
     - coverage

diff --git a/devtools/travis-ci/install_miniconda.sh b/devtools/travis-ci/install_miniconda.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 MINICONDA=Miniconda3-latest-Linux-x86_64.sh
-MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *<td>\(.*\)<\/td> */\1/p')
-wget https://repo.continuum.io/miniconda/$MINICONDA
+MINICONDA_REPO=https://repo.anaconda.com/miniconda
+MINICONDA_MD5=$(curl $MINICONDA_REPO/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *<td>\(.*\)<\/td> */\1/p')
+wget $MINICONDA_REPO/$MINICONDA
 if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
     echo "Miniconda MD5 mismatch"
     exit 1

diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -20,7 +20,7 @@ New Features
 + Added ``n_jobs`` flag for ``osprey worker`` to control how many threads are used for cross-validation.
 + Added the ability to specify three different acquisition functions for the gaussian processes strategy: expected improvement `ei`, upper confidence bound, `ucb` and the original Osprey function (the default), `osprey`.
 + Refactored code: GP strategy has been replaced by a general Bayes strategy.  This will eventually allow different surrogate functions, other than Gaussian Process, to be specified.
-
++ Update Python compatability: Now compatable with Python 3.7. MSMBuilder is now an optional.
 
 Bug Fixes
 ~~~~~~~~~

diff --git a/osprey/config.py b/osprey/config.py
@@ -372,7 +372,7 @@ def sha1(self):
 
 
 def parse(f):
-    res = yaml.load(f)
+    res = yaml.load(f, Loader=yaml.SafeLoader)
     if res is None:
         res = {}
 

diff --git a/osprey/dataset_loaders.py b/osprey/dataset_loaders.py
@@ -219,7 +219,7 @@ class JoblibDatasetLoader(BaseDatasetLoader):
     short_name = 'joblib'
 
     def __init__(self, filenames, x_name=None, y_name=None,
-                 system_joblib=False):
+                 system_joblib=True):
         self.filenames = filenames
         self.x_name = x_name
         self.y_name = y_name

diff --git a/osprey/fit_estimator.py b/osprey/fit_estimator.py
@@ -6,13 +6,21 @@
 import numpy as np
 import sklearn
 from sklearn.base import is_classifier, clone
-from sklearn.metrics.scorer import check_scoring
-from sklearn.externals.joblib import Parallel, delayed
+try:
+    from sklearn.metrics import check_scoring
+except ModuleNotFoundError:
+    from sklearn.metrics.scorer import check_scoring
+# TODO: Remove old import? externals.joblib deprecated as of 0.23
+try:
+    from joblib import Parallel, delayed
+except ModuleNotFoundError:
+    from sklearn.externals.joblib import Parallel, delayed
+
 from sklearn.model_selection import check_cv
 from sklearn.model_selection._validation import _safe_split, _score
 
 from .utils import check_arrays, num_samples
-from .utils import short_format_time, is_msmbuilder_estimator
+from .utils import short_format_time, is_msmbuilder_estimator, is_pyemma_estimator
 
 
 if LooseVersion(sklearn.__version__) < LooseVersion('0.16.1'):
@@ -40,7 +48,6 @@ def fit_and_score_estimator(estimator, parameters, cv, X, y=None, scoring=None,
         The scores on the training and test sets, as well as the mean test set
         score.
     """
-
     scorer = check_scoring(estimator, scoring=scoring)
     n_samples = num_samples(X)
     X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr',
@@ -136,9 +143,11 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
 
     scoring_time = time.time() - start_time
 
-    msmbuilder_api = is_msmbuilder_estimator(estimator)
-    n_samples_test = num_samples(X_test, is_nested=msmbuilder_api)
-    n_samples_train = num_samples(X_train, is_nested=msmbuilder_api)
+    # With PyEMMA and MSMBuilder, observations are nested in separate lists not just in a single
+    # array.
+    is_nested = is_msmbuilder_estimator(estimator) or is_pyemma_estimator(estimator)
+    n_samples_test = num_samples(X_test, is_nested=is_nested)
+    n_samples_train = num_samples(X_train, is_nested=is_nested)
     if verbose > 2:
         msg += ", score=%f" % test_score
     if verbose > 1:

diff --git a/osprey/plot.py b/osprey/plot.py
@@ -24,7 +24,7 @@ def nonconstant_parameters(data):
     assert len(data) > 0
     df = pd.DataFrame([d['parameters'] for d in data])
     # http://stackoverflow.com/a/20210048/1079728
-    filtered = df.loc[:, (df != df.ix[0]).any()]
+    filtered = df.loc[:, (df != df.iloc[0]).any()]
     return filtered
 
 

diff --git a/osprey/plugins/tests/test_plugin_pylearn2.py b/osprey/plugins/tests/test_plugin_pylearn2.py
@@ -6,7 +6,12 @@
 import unittest
 from argparse import Namespace
 import numpy as np
-from numpy.testing.decorators import skipif
+# TODO remove compat with py<=3.6
+try:
+    from numpy.testing import dec
+    skipif = dec.skipif
+except ModuleNotFoundError:
+    from numpy.testing.decorators import skipif
 
 try:
     import pylearn2

diff --git a/osprey/tests/test_cli_skeleton.py b/osprey/tests/test_cli_skeleton.py
@@ -18,11 +18,11 @@ def test_1():
 
     try:
         os.chdir(dirname)
-        subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'msmbuilder',
+        subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'sklearn',
                               '-f', 'config.yaml'])
         assert os.path.exists('config.yaml')
         with open('config.yaml', 'rb') as f:
-            yaml.load(f)
+            yaml.load(f, Loader=yaml.SafeLoader)
         Config('config.yaml')
 
     finally:

diff --git a/osprey/tests/test_cli_worker_and_dump.py b/osprey/tests/test_cli_worker_and_dump.py
@@ -7,10 +7,16 @@
 import subprocess
 import tempfile
 from distutils.spawn import find_executable
-from numpy.testing.decorators import skipif
-from nose.plugins.skip import SkipTest
+# TODO remove compat with py<=3.6
+try:
+    from numpy.testing import dec
+    skipif = dec.skipif
+except ModuleNotFoundError:
+    from numpy.testing.decorators import skipif
 
+# from nose.plugins.skip import SkipTest
 
+# TODO need Pyemma analogues of these tests.
 try:
     __import__('msmbuilder.example_datasets')
     HAVE_MSMBUILDER = True
@@ -20,12 +26,13 @@
 OSPREY_BIN = find_executable('osprey')
 
 
-# @skipif(not HAVE_MSMBUILDER, 'this test requires MSMBuilder')
+@skipif(not HAVE_MSMBUILDER, 'this test requires MSMBuilder')
 def test_msmbuilder_skeleton():
-    try:
-        from msmbuilder.example_datasets import FsPeptide
-    except ImportError as e:
-        raise SkipTest(e)
+    from msmbuilder.example_datasets import FsPeptide
+    # try:
+    #     from msmbuilder.example_datasets import FsPeptide
+    # except ImportError as e:
+    #     raise SkipTest(e)
 
     assert OSPREY_BIN is not None
     cwd = os.path.abspath(os.curdir)
@@ -48,12 +55,13 @@ def test_msmbuilder_skeleton():
         shutil.rmtree(dirname)
 
 
-# @skipif(not HAVE_MSMBUILDER, 'this test requires MSMBuilder')
+@skipif(not HAVE_MSMBUILDER, 'this test requires MSMBuilder')
 def test_msmb_feat_select_skeleton():
-    try:
-        from msmbuilder.example_datasets import FsPeptide
-    except ImportError as e:
-        raise SkipTest(e)
+    from msmbuilder.example_datasets import FsPeptide
+    # try:
+    #     from msmbuilder.example_datasets import FsPeptide
+    # except ImportError as e:
+    #     raise SkipTest(e)
 
     assert OSPREY_BIN is not None
     cwd = os.path.abspath(os.curdir)

diff --git a/osprey/tests/test_config.py b/osprey/tests/test_config.py
@@ -14,6 +14,7 @@
 from osprey.search_space import IntVariable, FloatVariable, EnumVariable
 from osprey.strategies import RandomSearch, HyperoptTPE, Bayes
 
+import warnings
 
 os.environ['OSPREYRC'] = ' '
 
@@ -24,21 +25,24 @@ def test_estimator_pickle():
     with tempfile.NamedTemporaryFile('w+b', 0) as f:
 
         cPickle.dump(KMeans(), f)
-
         config = Config.fromdict({
             'estimator': {'pickle': f.name}
         }, check_fields=False)
+
         assert isinstance(config.estimator(), KMeans)
 
 
 def test_estimator_eval_1():
-    config = Config.fromdict({
-        'estimator': {
-            'eval': 'KMeans()',
-            'eval_scope': 'sklearn',
-        }
-    }, check_fields=False)
-    assert isinstance(config.estimator(), KMeans)
+    # TODO: why is this the only call to sklearn that produces the FutureWarnings?  What about eval_1?
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=FutureWarning)
+        config = Config.fromdict({
+            'estimator': {
+                'eval': 'KMeans()',
+                'eval_scope': 'sklearn',
+            }
+        }, check_fields=False)
+        assert isinstance(config.estimator(), KMeans)
 
 
 def test_estimator_eval_2():

diff --git a/osprey/tests/test_dataset_loader.py b/osprey/tests/test_dataset_loader.py
@@ -6,8 +6,17 @@
 
 import numpy as np
 import sklearn.datasets
-from sklearn.externals.joblib import dump
-from numpy.testing.decorators import skipif
+# TODO remove old import?
+try:
+    from joblib import dump
+except ModuleNotFoundError:
+    from sklearn.externals.joblib import dump
+# TODO remove compat with py<=3.6
+try:
+    from numpy.testing import dec
+    skipif = dec.skipif
+except ModuleNotFoundError:
+    from numpy.testing.decorators import skipif
 
 from osprey.dataset_loaders import (DSVDatasetLoader, FilenameDatasetLoader,
                                     JoblibDatasetLoader, HDF5DatasetLoader,
@@ -177,7 +186,7 @@ def test_MDTrajDatasetLoader_1():
     finally:
         shutil.rmtree(dirname)
 
-
+@skipif(not HAVE_MSMBUILDER, 'this test requires MSMBuilder')
 def test_MSMBuilderDatasetLoader_1():
     # TODO Why does this work when other msmbuilder imports don't?
     from msmbuilder.dataset import dataset

diff --git a/osprey/tests/test_fit_estimator.py b/osprey/tests/test_fit_estimator.py
@@ -9,9 +9,27 @@
 
 from osprey.fit_estimator import fit_and_score_estimator
 
+try:
+    __import__('msmbuilder.msm')
+    HAVE_MSMBUILDER = True
+except:
+    HAVE_MSMBUILDER = False
+
+try:
+    __import__('pyemma.msm')
+    HAVE_PYEMMA = True
+except:
+    HAVE_PYEMMA = False
+
+# TODO remove compat with py<=3.6
+try:
+    from numpy.testing import dec
+    skipif = dec.skipif
+except ModuleNotFoundError:
+    from numpy.testing.decorators import skipif
 
 def test_1():
-    X, y = make_regression(n_features=10)
+    X, y = make_regression(n_samples=300, n_features=10)
 
     lasso = Lasso()
     params = {'alpha': 2}
@@ -29,12 +47,9 @@ def test_1():
         [g.cv_results_['split{}_test_score'.format(i)] for i in range(cv)])
     assert np.all(out['test_scores'] == test_scores)
 
-
+@skipif(not HAVE_MSMBUILDER, "this test requires MSMBuilder")
 def test_2():
-    try:
-        from msmbuilder.msm import MarkovStateModel
-    except ImportError as e:
-        raise SkipTest(e)
+    from msmbuilder.msm import MarkovStateModel
 
     X = [np.random.randint(2, size=10), np.random.randint(2, size=11)]
     out = fit_and_score_estimator(MarkovStateModel(), {'verbose': False},
@@ -44,3 +59,17 @@ def test_2():
                                   verbose=0)
     np.testing.assert_array_equal(out['n_train_samples'], [11, 10])
     np.testing.assert_array_equal(out['n_test_samples'], [10, 11])
+
+
+@skipif(not HAVE_PYEMMA, "this test requires PyEMMA")
+def test_3():
+    from pyemma.msm import MaximumLikelihoodMSM
+
+    X = [np.random.randint(2, size=10), np.random.randint(2, size=11)]
+    out = fit_and_score_estimator(MaximumLikelihoodMSM(), {'score_k': 2},
+                                  cv=2,
+                                  X=X,
+                                  y=None,
+                                  verbose=0)
+    np.testing.assert_array_equal(out['n_train_samples'], [11, 10])
+    np.testing.assert_array_equal(out['n_test_samples'], [10, 11])