Skip to content
This repository was archived by the owner on Dec 4, 2019. It is now read-only.

Commit 9f67a74

Browse files
authored
Clean up sklearn warnings (#94)
1 parent 552b546 commit 9f67a74

File tree

5 files changed

+13
-14
lines changed

5 files changed

+13
-14
lines changed

README.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ This project is also available as as `Spark package <https://spark-packages.org/
3030

3131
The developer version has the following requirements:
3232

33-
- a recent release of scikit-learn. Releases 0.18.1, 0.19.0 have been tested, older versions may work too.
33+
- scikit-learn 0.18 or 0.19. Later versions may work, but tests currently are incompatible with 0.20.
3434
- Spark >= 2.1.1. Spark may be downloaded from the `Spark website <https://spark.apache.org/>`_.
3535
In order to use this package, you need to use the pyspark interpreter or another Spark-compliant python
3636
interpreter. See the `Spark guide <https://spark.apache.org/docs/latest/programming-guide.html#overview>`_
@@ -61,11 +61,11 @@ on how to install the package.
6161

6262
.. code:: python
6363
64-
from sklearn import svm, grid_search, datasets
64+
from sklearn import svm, datasets
6565
from spark_sklearn import GridSearchCV
6666
iris = datasets.load_iris()
6767
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
68-
svr = svm.SVC()
68+
svr = svm.SVC(gamma='auto')
6969
clf = GridSearchCV(sc, svr, parameters)
7070
clf.fit(iris.data, iris.target)
7171

python/spark_sklearn/converter_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def _compare_GLMs(self, skl, spark):
2626
self.assertEqual(skl.intercept_, spark.intercept)
2727

2828
def test_LogisticRegression_skl2spark(self):
29-
skl_lr = SKL_LogisticRegression().fit(self.X, self.y)
29+
skl_lr = SKL_LogisticRegression(solver='lbfgs').fit(self.X, self.y)
3030
lr = self.converter.toSpark(skl_lr)
3131
self.assertTrue(isinstance(lr, LogisticRegressionModel),
3232
"Expected LogisticRegressionModel but found type %s" % type(lr))
@@ -72,7 +72,6 @@ def ztest_toPandas(self):
7272
@fixtureReuseSparkSession
7373
class CSRVectorUDTTests(MLlibTestCase):
7474

75-
@unittest.skip("CSR Matrix support not present for Spark 2.0 - see issue #24")
7675
def test_scipy_sparse(self):
7776
data = [(self.list2csr([0.1, 0.2]),)]
7877
df = self.sql.createDataFrame(data, ["features"])

python/spark_sklearn/grid_search.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,11 @@ class GridSearchCV(BaseSearchCV):
117117
>>> sc = createLocalSparkSession().sparkContext
118118
>>> iris = datasets.load_iris()
119119
>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
120-
>>> svr = svm.SVC()
120+
>>> svr = svm.SVC(gamma='auto')
121121
>>> clf = GridSearchCV(sc, svr, parameters)
122122
>>> clf.fit(iris.data, iris.target)
123123
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
124-
GridSearchCV(cv=None, error_score=...,
124+
GridSearchCV(cv=3, error_score=...,
125125
estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
126126
decision_function_shape=..., degree=..., gamma=...,
127127
kernel='rbf', max_iter=-1, probability=False,
@@ -242,7 +242,7 @@ class GridSearchCV(BaseSearchCV):
242242

243243

244244
def __init__(self, sc, estimator, param_grid, scoring=None, fit_params=None,
245-
n_jobs=1, iid=True, refit=True, cv=None, verbose=0,
245+
n_jobs=1, iid=True, refit=True, cv=3, verbose=0,
246246
pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True):
247247
super(GridSearchCV, self).__init__(
248248
estimator=estimator, scoring=scoring, n_jobs=n_jobs, iid=iid,

python/spark_sklearn/tests/test_grid_search_2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_example(self):
3535
# The classic example from the sklearn documentation
3636
iris = datasets.load_iris()
3737
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
38-
svr = svm.SVC()
38+
svr = svm.SVC(gamma='auto')
3939
clf = grid_search.GridSearchCV(svr, parameters)
4040
clf.fit(iris.data, iris.target)
4141

@@ -55,7 +55,7 @@ def setUp(self):
5555

5656
def test_cv_linreg(self):
5757
pipeline = SKL_Pipeline([
58-
('lasso', SKL_Lasso(max_iter=1))
58+
('lasso', SKL_Lasso())
5959
])
6060
parameters = {
6161
'lasso__alpha': (0.001, 0.005, 0.01)
@@ -70,7 +70,7 @@ def test_cv_pipeline(self):
7070
pipeline = SKL_Pipeline([
7171
('vect', SKL_HashingVectorizer(n_features=20)),
7272
('tfidf', SKL_TfidfTransformer(use_idf=False)),
73-
('lasso', SKL_Lasso(max_iter=1))
73+
('lasso', SKL_Lasso())
7474
])
7575
parameters = {
7676
'lasso__alpha': (0.001, 0.005, 0.01)
@@ -113,7 +113,7 @@ def test_cv_lasso_with_mllib_featurization(self):
113113
df = self.converter.toPandas(data.select(data.features.alias("review"), "rating"))
114114

115115
pipeline = SKL_Pipeline([
116-
('lasso', SKL_Lasso(max_iter=1))
116+
('lasso', SKL_Lasso())
117117
])
118118
parameters = {
119119
'lasso__alpha': (0.001, 0.005, 0.01)

python/spark_sklearn/tests/test_keyed_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def makeOutput(estimator, X):
249249
_assertPandasAlmostEqual(actualDF, expectedDF, keyCols + ["useless"])
250250

251251
def test_transformer(self):
252-
minExamples = 1
252+
minExamples = 2
253253
featureGen = lambda: np.random.random(KeyedModelTests.NDIM)
254254
labelGen = None
255255
self.checkKeyedModelEquivalent(minExamples, featureGen, labelGen,
@@ -277,7 +277,7 @@ def test_classification_predictor(self):
277277
# Need to ensure each user has at least one of each label to train on.
278278
cyc = cycle([-1, 1])
279279
labelGen = lambda: next(cyc)
280-
lr = LogisticRegression(random_state=0)
280+
lr = LogisticRegression(solver='lbfgs', random_state=0)
281281
self.checkKeyedModelEquivalent(minExamples, featureGen, labelGen,
282282
sklearnEstimator=lr, yCol="y")
283283

0 commit comments

Comments
 (0)