Skip to content

Commit e2c6062

Browse files
committed
Make it easier to distinguish clusters
1 parent 0f8a5d8 commit e2c6062

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

source/clustering.md

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,8 @@ points_kmeans_init = alt.Chart(penguin_data).mark_point(size=75, filled=True, op
474474
alt.X("flipper_length_standardized").title("Flipper Length (standardized)"),
475475
alt.Y("bill_length_standardized").title("Bill Length (standardized)"),
476476
alt.Color('label:N').legend(None),
477-
alt.Shape('label:N').legend(None)
477+
alt.Shape('label:N').legend(None).scale(range=['square', 'circle', 'triangle']),
478+
alt.Size('label:O').legend(None).scale(type='ordinal', range=[50, 50, 100]),
478479
)
479480
480481
glue('toy-kmeans-init-1', points_kmeans_init, display=True)
@@ -491,6 +492,7 @@ An example random initialization is shown in {numref}`toy-kmeans-init-1`
491492
:name: toy-kmeans-init-1
492493

493494
Random initialization of labels.
495+
Each cluster is depicted as a different color and shape.
494496
:::
495497

496498
```{code-cell} ipython3
@@ -523,20 +525,22 @@ def plot_kmean_iterations(iterations, data, centroid_init):
523525
pd.concat(dfs),
524526
width=200,
525527
height=200
526-
).mark_point(filled=True, size=75, opacity=1).encode(
528+
).mark_point(filled=True, size=50, opacity=1).encode(
527529
alt.X("flipper_length_standardized").scale(domain=(-2, 2)),
528530
alt.Y("bill_length_standardized").scale(domain=(-2, 2)),
529531
alt.Color('label:N').legend(None),
530-
alt.Shape('label:N').legend(None)
532+
alt.Shape('label:N').legend(None).scale(range=['square', 'circle', 'triangle']),
533+
alt.Size('label:O').legend(None).scale(type='ordinal', range=[50, 50, 100]),
531534
)
532535
533-
centroids = points.mark_point(size=200, filled=True, stroke='black', strokeWidth=1.5).encode(
536+
centroids = points.mark_point(filled=True, stroke='black', strokeWidth=1.25).encode(
534537
alt.X("mean(flipper_centroid)")
535538
.scale(domain=(-2, 2))
536539
.title("Flipper Length (standardized)"),
537540
alt.Y("mean(bill_centroid)")
538541
.scale(domain=(-2, 2))
539-
.title("Flipper Length (standardized)")
542+
.title("Bill Length (standardized)"),
543+
size=alt.value(200)
540544
)
541545
542546
return (points + centroids).facet(
@@ -617,7 +621,8 @@ points_kmeans_init = alt.Chart(penguin_data).mark_point(size=75, filled=True, op
617621
alt.X("flipper_length_standardized").title("Flipper Length (standardized)"),
618622
alt.Y("bill_length_standardized").title("Bill Length (standardized)"),
619623
alt.Color('label:N').legend(None),
620-
alt.Shape('label:N').legend(None)
624+
alt.Shape('label:N').legend(None).scale(range=['square', 'circle', 'triangle']),
625+
alt.Size('label:O').legend(None).scale(type='ordinal', range=[50, 50, 100]),
621626
)
622627
623628
glue('toy-kmeans-bad-init-1', points_kmeans_init, display=True)
@@ -686,14 +691,17 @@ points = alt.Chart(pd.concat(dfs), width=200, height=200).mark_point(filled=True
686691
.scale(zero=False)
687692
.title("Bill Length (standardized)"),
688693
alt.Color('label:N').legend(None),
689-
alt.Shape('label:N').legend(None),
694+
alt.Shape('label:N').legend(None).scale(range=['square', 'circle', 'triangle', 'cross', 'diamond', 'triangle-right', 'triangle-down', 'triangle-left']),
695+
alt.Size('label:O').legend(None).scale(type='ordinal', range=[50, 50, 100, 100, 100, 100, 100, 100]),
696+
# alt.Shape('label:N').legend(None),
690697
)
691698
692699
vary_k = alt.layer(
693700
points,
694-
points.mark_point(filled=True, size=200, stroke='black', strokeWidth=1).encode(
701+
points.mark_point(filled=True, stroke='black', strokeWidth=1.25).encode(
695702
alt.X('mean(bill_length_standardized)'),
696703
alt.Y('mean(flipper_length_standardized)'),
704+
size=alt.value(200)
697705
)
698706
).facet(
699707
alt.Facet(

0 commit comments

Comments
 (0)