Skip to content

Commit d762d11

Browse files
zhengruifengsrowen
authored andcommitted
[SPARK-22832][ML] BisectingKMeans unpersist unused datasets
## What changes were proposed in this pull request? unpersist unused datasets ## How was this patch tested? existing tests and local check in Spark-Shell Author: Zheng RuiFeng <[email protected]> Closes #20017 from zhengruifeng/bkm_unpersist.
1 parent 7798c9e commit d762d11

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ class BisectingKMeans private (
197197
newClusters = summarize(d, newAssignments)
198198
newClusterCenters = newClusters.mapValues(_.center).map(identity)
199199
}
200-
if (preIndices != null) preIndices.unpersist()
200+
if (preIndices != null) {
201+
preIndices.unpersist(false)
202+
}
201203
preIndices = indices
202204
indices = updateAssignments(assignments, divisibleIndices, newClusterCenters).keys
203205
.persist(StorageLevel.MEMORY_AND_DISK)
@@ -212,7 +214,13 @@ class BisectingKMeans private (
212214
}
213215
level += 1
214216
}
215-
if(indices != null) indices.unpersist()
217+
if (preIndices != null) {
218+
preIndices.unpersist(false)
219+
}
220+
if (indices != null) {
221+
indices.unpersist(false)
222+
}
223+
norms.unpersist(false)
216224
val clusters = activeClusters ++ inactiveClusters
217225
val root = buildTree(clusters)
218226
new BisectingKMeansModel(root)

0 commit comments

Comments
 (0)