Add nightly wheels and try to pass check_estimator

rth · rth · commit f0521dcd8e7d · 2019-03-01T17:14:04.000+01:00
diff --git a/.travis.yml b/.travis.yml
@@ -15,6 +15,8 @@ matrix:
            SKLEARN_VERSION="0.20.2"
     - env: PYTHON_VERSION="3.7" NUMPY_VERSION="*" SCIPY_VERSION="*"
            SKLEARN_VERSION="*"
+    - env: PYTHON_VERSION="3.7" NUMPY_VERSION="*" SCIPY_VERSION="*"
+           SKLEARN_VERSION="nightly"
 
 install:
   # install miniconda
@@ -27,7 +29,14 @@ install:
   # create the testing environment
   - conda create -n testenv --yes python=$PYTHON_VERSION pip
   - source activate testenv
-  - conda install --yes numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION scikit-learn==$SKLEARN_VERSION cython nose pytest pytest-cov
+  - |
+      if [ $SKLEARN_VERSION = "nightly" ]; then
+          conda install --yes numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION cython nose pytest pytest-cov
+          # install nightly wheels
+          pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
+      else
+          conda install --yes numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION scikit-learn==$SKLEARN_VERSION cython nose pytest pytest-cov
+      fi
   - pip install codecov
   - pip install .
 
diff --git a/benchmarks/bench_rbfsampler_fastfood.py b/benchmarks/bench_rbfsampler_fastfood.py
@@ -25,7 +25,7 @@
 # rbf_kernel(X, Y, gamma=gamma)
 exact_end = datetime.datetime.utcnow()
 exact_spent_time = exact_end - exact_start
-print "Timimg exact rbf: \t\t", exact_spent_time
+print("Timimg exact rbf: \t\t", exact_spent_time)
 
 rbf_transform = Fastfood(sigma=sigma,
                          n_components=number_of_features_to_generate,
@@ -39,7 +39,7 @@
 fastfood_fast_vec_end = datetime.datetime.utcnow()
 fastfood_fast_vec_spent_time = fastfood_fast_vec_end - \
     fastfood_fast_vec_start
-print "Timimg fastfood fast vectorized: \t\t", fastfood_fast_vec_spent_time
+print("Timimg fastfood fast vectorized: \t\t", fastfood_fast_vec_spent_time)
 
 rks_rbf_transform = RBFSampler(gamma=gamma,
                                n_components=number_of_features_to_generate,
@@ -51,6 +51,6 @@
 _ = rks_rbf_transform.transform(Y)
 rks_end = datetime.datetime.utcnow()
 rks_spent_time = rks_end - rks_start
-print "Timimg rks: \t\t\t", rks_spent_time
+print("Timimg rks: \t\t\t", rks_spent_time)
 
 assert_greater(rks_spent_time, fastfood_fast_vec_spent_time)
diff --git a/sklearn_extra/kernel_approximation/_fastfood.py b/sklearn_extra/kernel_approximation/_fastfood.py
@@ -44,7 +44,7 @@ class Fastfood(BaseEstimator, TransformerMixin):
     Notes
     -----
     See "Fastfood | Approximating Kernel Expansions in Loglinear Time" by
-    Quoc Le, Tamas Sarl and Alex Smola.
+    Quoc Le, Tamas Sarl and Alex Smola.
 
     Examples
     ----
@@ -62,11 +62,9 @@ def __init__(self,
         self.sigma = sigma
         self.n_components = n_components
         self.random_state = random_state
-        self.rng = check_random_state(self.random_state)
         # map to 2*n_components features or to n_components features with less
         # accuracy
-        self.tradeoff_mem_accuracy = \
-            tradeoff_mem_accuracy
+        self.tradeoff_mem_accuracy = tradeoff_mem_accuracy
 
     @staticmethod
     def is_number_power_of_two(n):
@@ -87,13 +85,14 @@ def enforce_dimensionality_constraints(d, n):
 
     def pad_with_zeros(self, X):
         try:
-            X_padded = np.pad(X,
-                              ((0, 0),
-                               (0, self.number_of_features_to_pad_with_zeros)),
-                              'constant')
+            X_padded = np.pad(
+                X,
+                ((0, 0),
+                 (0, self._number_of_features_to_pad_with_zeros)),
+                'constant')
         except AttributeError:
             zeros = np.zeros((X.shape[0],
-                              self.number_of_features_to_pad_with_zeros))
+                              self._number_of_features_to_pad_with_zeros))
             X_padded = np.concatenate((X, zeros), axis=1)
 
         return X_padded
@@ -108,38 +107,38 @@ def l2norm_along_axis1(X):
 
     def uniform_vector(self):
         if self.tradeoff_mem_accuracy != 'accuracy':
-            return self.rng.uniform(0, 2 * np.pi, size=self.n)
+            return self._rng.uniform(0, 2 * np.pi, size=self._n)
         else:
             return None
 
     def apply_approximate_gaussian_matrix(self, B, G, P, X):
         """ Create mapping of all x_i by applying B, G and P step-wise """
         num_examples = X.shape[0]
 
-        result = np.multiply(B, X.reshape((1, num_examples, 1, self.d)))
-        result = result.reshape((num_examples*self.times_to_stack_v, self.d))
+        result = np.multiply(B, X.reshape((1, num_examples, 1, self._d)))
+        result = result.reshape((num_examples*self._times_to_stack_v, self._d))
         Fastfood.approx_fourier_transformation_multi_dim(result)
         result = result.reshape((num_examples, -1))
         np.take(result, P, axis=1, mode='wrap', out=result)
-        np.multiply(np.ravel(G), result.reshape(num_examples, self.n),
+        np.multiply(np.ravel(G), result.reshape(num_examples, self._n),
                     out=result)
-        result = result.reshape(num_examples*self.times_to_stack_v, self.d)
+        result = result.reshape(num_examples*self._times_to_stack_v, self._d)
         Fastfood.approx_fourier_transformation_multi_dim(result)
         return result
 
     def scale_transformed_data(self, S, VX):
         """ Scale mapped data VX to match kernel(e.g. RBF-Kernel) """
-        VX = VX.reshape(-1, self.times_to_stack_v*self.d)
+        VX = VX.reshape(-1, self._times_to_stack_v*self._d)
 
-        return (1 / (self.sigma * np.sqrt(self.d)) *
+        return (1 / (self.sigma * np.sqrt(self._d)) *
                 np.multiply(np.ravel(S), VX))
 
     def phi(self, X):
         if self.tradeoff_mem_accuracy == 'accuracy':
             return (1 / np.sqrt(X.shape[1])) * \
                 np.hstack([np.cos(X), np.sin(X)])
         else:
-            np.cos(X+self.U, X)
+            np.cos(X+self._U, X)
             return X * np.sqrt(2. / X.shape[1])
 
     def fit(self, X, y=None):
@@ -159,28 +158,29 @@ def fit(self, X, y=None):
         self : object
             Returns the transformer.
         """
-        X = check_array(X)
+        X = check_array(X, dtype=np.float64)
 
         d_orig = X.shape[1]
+        self._rng = check_random_state(self.random_state)
 
-        self.d, self.n, self.times_to_stack_v = \
+        self._d, self._n, self._times_to_stack_v = \
             Fastfood.enforce_dimensionality_constraints(d_orig,
                                                         self.n_components)
-        self.number_of_features_to_pad_with_zeros = self.d - d_orig
+        self._number_of_features_to_pad_with_zeros = self._d - d_orig
 
-        self.G = self.rng.normal(size=(self.times_to_stack_v, self.d))
-        self.B = self.rng.choice(
+        self._G = self._rng.normal(size=(self._times_to_stack_v, self._d))
+        self._B = self._rng.choice(
                 [-1, 1],
-                size=(self.times_to_stack_v, self.d),
+                size=(self._times_to_stack_v, self._d),
                 replace=True)
-        self.P = np.hstack([(i*self.d)+self.rng.permutation(self.d)
-                            for i in range(self.times_to_stack_v)])
-        self.S = np.multiply(1 / self.l2norm_along_axis1(self.G)
-                             .reshape((-1, 1)),
-                             chi.rvs(self.d,
-                                     size=(self.times_to_stack_v, self.d)))
+        self._P = np.hstack([(i*self._d)+self._rng.permutation(self._d)
+                            for i in range(self._times_to_stack_v)])
+        self._S = np.multiply(1 / self.l2norm_along_axis1(self._G)
+                              .reshape((-1, 1)),
+                              chi.rvs(self._d,
+                                      size=(self._times_to_stack_v, self._d)))
 
-        self.U = self.uniform_vector()
+        self._U = self.uniform_vector()
 
         return self
 
@@ -197,11 +197,14 @@ def transform(self, X):
         -------
         X_new : array-like, shape (n_samples, n_components)
         """
-        X = check_array(X)
+        X = check_array(X, dtype=np.float64)
         X_padded = self.pad_with_zeros(X)
-        HGPHBX = self.apply_approximate_gaussian_matrix(self.B,
-                                                        self.G,
-                                                        self.P,
+        HGPHBX = self.apply_approximate_gaussian_matrix(self._B,
+                                                        self._G,
+                                                        self._P,
                                                         X_padded)
-        VX = self.scale_transformed_data(self.S, HGPHBX)
+        VX = self.scale_transformed_data(self._S, HGPHBX)
         return self.phi(VX)
+
+    def _more_tags(self):
+        return {'non_deterministic': True}
diff --git a/sklearn_extra/tests/test_common.py b/sklearn_extra/tests/test_common.py
@@ -5,10 +5,13 @@
 from sklearn_extra import TemplateEstimator
 from sklearn_extra import TemplateClassifier
 from sklearn_extra import TemplateTransformer
+from sklearn_extra.kernel_approximation import Fastfood
 
 
 @pytest.mark.parametrize(
-    "Estimator", [TemplateEstimator, TemplateTransformer, TemplateClassifier]
+    "Estimator",
+    [TemplateEstimator, TemplateTransformer, TemplateClassifier,
+     Fastfood]
 )
-def test_all_estimators(Estimator):
+def test_all_estimators(Estimator, request):
     return check_estimator(Estimator)