|
20 | 20 | codebook, _ = kmeans(whitened_counts, 3) |
21 | 21 | codes, _ = vq(whitened_counts, codebook) |
22 | 22 |
|
23 | | -print("definitely spam:", unique_counts[codes == 0][-1]) |
24 | | -print("definitely ham:", unique_counts[codes == 1][-1]) |
25 | | -print("unknown:", unique_counts[codes == 2][-1]) |
| 23 | +possible_codes = {0, 1, 2} |
| 24 | +unique_codes, code_indices = np.unique(codes, return_index=True) |
| 25 | +ham_code = unique_codes[np.argmin(code_indices)] |
| 26 | +spam_code = unique_codes[np.argmax(code_indices)] |
| 27 | +unknown_code = list(possible_codes ^ set((ham_code, spam_code)))[0] |
| 28 | + |
| 29 | +print("definitely ham:", unique_counts[codes == ham_code][-1]) |
| 30 | +print("definitely spam:", unique_counts[codes == spam_code][-1]) |
| 31 | +print("unknown:", unique_counts[codes == unknown_code][-1]) |
26 | 32 |
|
27 | 33 | digits = digit_counts[:, 1] |
28 | 34 | predicted_hams = digits == 0 |
29 | 35 | predicted_spams = digits > 20 |
30 | 36 | predicted_unknowns = np.logical_and(digits > 0, digits <= 20) |
31 | 37 |
|
32 | | -spam_cluster = digit_counts[predicted_spams] |
33 | 38 | ham_cluster = digit_counts[predicted_hams] |
| 39 | +spam_cluster = digit_counts[predicted_spams] |
34 | 40 | unknown_cluster = digit_counts[predicted_unknowns] |
35 | 41 |
|
36 | | -print("definitely ham:", np.unique(ham_cluster[:, 0], return_counts=True)) |
37 | | -print("definitely spam:", np.unique(spam_cluster[:, 0], return_counts=True)) |
38 | | -print("unknown:", np.unique(unknown_cluster[:, 0], return_counts=True)) |
| 42 | +print("hams:", np.unique(ham_cluster[:, 0], return_counts=True)) |
| 43 | +print("spams:", np.unique(spam_cluster[:, 0], return_counts=True)) |
| 44 | +print("unknowns:", np.unique(unknown_cluster[:, 0], return_counts=True)) |
0 commit comments