File tree Expand file tree Collapse file tree 3 files changed +27
-9
lines changed
java/org/apache/spark/examples/ml
scala/org/apache/spark/examples/ml Expand file tree Collapse file tree 3 files changed +27
-9
lines changed Original file line number Diff line number Diff line change 20
20
// $example on$
21
21
import org .apache .spark .ml .clustering .KMeansModel ;
22
22
import org .apache .spark .ml .clustering .KMeans ;
23
+ import org .apache .spark .ml .evaluation .ClusteringEvaluator ;
23
24
import org .apache .spark .ml .linalg .Vector ;
24
25
import org .apache .spark .sql .Dataset ;
25
26
import org .apache .spark .sql .Row ;
@@ -51,9 +52,14 @@ public static void main(String[] args) {
51
52
KMeans kmeans = new KMeans ().setK (2 ).setSeed (1L );
52
53
KMeansModel model = kmeans .fit (dataset );
53
54
54
- // Evaluate clustering by computing Within Set Sum of Squared Errors.
55
- double WSSSE = model .computeCost (dataset );
56
- System .out .println ("Within Set Sum of Squared Errors = " + WSSSE );
55
+ // Make predictions
56
+ Dataset <Row > predictions = model .transform (dataset );
57
+
58
+ // Evaluate clustering by computing Silhouette score
59
+ ClusteringEvaluator evaluator = new ClusteringEvaluator ();
60
+
61
+ double silhouette = evaluator .evaluate (predictions );
62
+ System .out .println ("Silhouette with squared euclidean distance = " + silhouette );
57
63
58
64
// Shows the result.
59
65
Vector [] centers = model .clusterCenters ();
Original file line number Diff line number Diff line change 19
19
20
20
# $example on$
21
21
from pyspark .ml .clustering import KMeans
22
+ from pyspark .ml .evaluation import ClusteringEvaluator
22
23
# $example off$
23
24
24
25
from pyspark .sql import SparkSession
45
46
kmeans = KMeans ().setK (2 ).setSeed (1 )
46
47
model = kmeans .fit (dataset )
47
48
48
- # Evaluate clustering by computing Within Set Sum of Squared Errors.
49
- wssse = model .computeCost (dataset )
50
- print ("Within Set Sum of Squared Errors = " + str (wssse ))
49
+ # Make predictions
50
+ predictions = model .transform (dataset )
51
+
52
+ # Evaluate clustering by computing Silhouette score
53
+ evaluator = ClusteringEvaluator ()
54
+
55
+ silhouette = evaluator .evaluate (predictions )
56
+ print ("Silhouette with squared euclidean distance = " + str (silhouette ))
51
57
52
58
# Shows the result.
53
59
centers = model .clusterCenters ()
Original file line number Diff line number Diff line change @@ -21,6 +21,7 @@ package org.apache.spark.examples.ml
21
21
22
22
// $example on$
23
23
import org .apache .spark .ml .clustering .KMeans
24
+ import org .apache .spark .ml .evaluation .ClusteringEvaluator
24
25
// $example off$
25
26
import org .apache .spark .sql .SparkSession
26
27
@@ -47,9 +48,14 @@ object KMeansExample {
47
48
val kmeans = new KMeans ().setK(2 ).setSeed(1L )
48
49
val model = kmeans.fit(dataset)
49
50
50
- // Evaluate clustering by computing Within Set Sum of Squared Errors.
51
- val WSSSE = model.computeCost(dataset)
52
- println(s " Within Set Sum of Squared Errors = $WSSSE" )
51
+ // Make predictions
52
+ val predictions = model.transform(dataset)
53
+
54
+ // Evaluate clustering by computing Silhouette score
55
+ val evaluator = new ClusteringEvaluator ()
56
+
57
+ val silhouette = evaluator.evaluate(predictions)
58
+ println(s " Silhouette with squared euclidean distance = $silhouette" )
53
59
54
60
// Shows the result.
55
61
println(" Cluster Centers: " )
You can’t perform that action at this time.
0 commit comments