[SPARK-18608][ML][FOLLOWUP] Fix double caching for PySpark OneVsRest.

yanboliang · yanboliang · commit c76153cc7dd2 · 2017-09-14T14:09:44.000+08:00
## What changes were proposed in this pull request? apache#19197 fixed double caching for MLlib algorithms, but missed PySpark ```OneVsRest```, this PR fixed it. ## How was this patch tested? Existing tests. Author: Yanbo Liang <ybliang8@gmail.com> Closes apache#19220 from yanboliang/SPARK-18608.
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -1773,8 +1773,7 @@ def _fit(self, dataset):
             multiclassLabeled = dataset.select(labelCol, featuresCol)
 
         # persist if underlying dataset is not persistent.
-        handlePersistence = \
-            dataset.rdd.getStorageLevel() == StorageLevel(False, False, False, False)
+        handlePersistence = dataset.storageLevel == StorageLevel(False, False, False, False)
         if handlePersistence:
             multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
 
@@ -1928,8 +1927,7 @@ def _transform(self, dataset):
         newDataset = dataset.withColumn(accColName, initUDF(dataset[origCols[0]]))
 
         # persist if underlying dataset is not persistent.
-        handlePersistence = \
-            dataset.rdd.getStorageLevel() == StorageLevel(False, False, False, False)
+        handlePersistence = dataset.storageLevel == StorageLevel(False, False, False, False)
         if handlePersistence:
             newDataset.persist(StorageLevel.MEMORY_AND_DISK)