-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkm.py
More file actions
118 lines (104 loc) · 3.56 KB
/
km.py
File metadata and controls
118 lines (104 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
"""
use distense matrix to perform kmean
A friendly implementation
"""
def nk(m, k, max_iterations=99, init_points_index=None):
n = m.shape[0]
if init_points_index is None:
k_ind = np.random.choice(np.arange(n), k)
else:
# copy instead of borrow ref
k_ind = np.copy(init_points_index)
print(k_ind)
assignment = np.zeros(n, dtype='int')
k_ind0 = np.zeros(k)
for t in range(max_iterations):
if np.array_equal(k_ind0, k_ind):
print("centroids not change")
break
dist = m[k_ind, :]
# dist => k,n
assignment = np.argmin(dist, axis=0)
k_ind0 = np.copy(k_ind)
# ass => n,
for i in np.arange(k):
nk_m = assignment == i
nk = m[nk_m, :][:, nk_m]
min_nk = np.argmin(nk.sum(axis=0))
k_ind[i] = np.arange(n)[nk_m][min_nk]
return (k_ind, assignment)
def nk_w(m, k, max_iterations=99, init_points_index=None):
# weighted version
n = m.shape[0]
if init_points_index is None:
print("no init point provided, random pick")
k_ind = np.random.choice(np.arange(n), k)
else:
k_ind = np.copy(init_points_index)
print(k_ind)
assignment = np.zeros(n, dtype='int')
k_ind0 = np.zeros(k)
for t in range(max_iterations):
if np.array_equal(k_ind0, k_ind):
print("centroids not change")
break
dist = m[k_ind, :]
# dist => k,n
assignment = np.argmin(dist, axis=0)
k_ind0 = np.copy(k_ind)
# ass => n,
for i in np.arange(k):
nk_m = assignment == i
# square the distence so that outlier get larger distence
nk = (m[nk_m, :][:, nk_m])**2
min_nk = np.argmin(nk.sum(axis=0))
k_ind[i] = np.arange(n)[nk_m][min_nk]
return (k_ind, assignment)
def vnk(m, k, max_iterations=999, init_points_index=None):
# use vector instead of loop
n = m.shape[0]
if init_points_index is None:
k_ind = np.random.choice(np.arange(n), k)
else:
k_ind = np.copy(init_points_index)
print(k_ind)
stackm = m[:, np.newaxis, :].repeat(k, 1)
assignment = np.zeros(n, dtype='int')
k_ind0 = np.zeros(k)
for _ in range(max_iterations):
if np.array_equal(k_ind0, k_ind):
print("centroids not change")
break
assignment = np.argmin(m[k_ind, :], axis=0)
# ass => n,
k_ind0 = np.copy(k_ind)
kd = np.zeros((n, k, n))
kd[np.arange(n), assignment, :] = stackm[np.arange(n), assignment, :]
km = kd.sum(axis=0)
k_ind = np.apply_along_axis(np.argmin, 1, km)
return (k_ind, assignment)
def vnk_w(m, k, w, max_iterations=999, init_points_index=None):
# w: the step for distance
# use vector instead of loop
n = m.shape[0]
if init_points_index is None:
k_ind = np.random.choice(np.arange(n), k)
else:
k_ind = np.copy(init_points_index)
print(k_ind)
stackm = m[:, np.newaxis, :].repeat(k, 1)
assignment = np.zeros(n, dtype='int')
k_ind0 = np.zeros(k)
for _ in range(max_iterations):
if np.array_equal(k_ind0, k_ind):
print("centroids not change")
break
assignment = np.argmin(m[k_ind, :], axis=0)
# ass => n,
k_ind0 = np.copy(k_ind)
kd = np.zeros((n, k, n))
kd[np.arange(n), assignment, :] = stackm[np.arange(n), assignment, :]
km = (kd**w).sum(axis=0)
k_ind = np.apply_along_axis(np.argmin, 1, km)
return (k_ind, assignment)