5
5
# Let alg be an instance of such an algorithm, then it should
6
6
# support the following usage:
7
7
#
8
- # initseeds!(iseeds, alg, X)
9
- # initseeds_by_costs!(iseeds, alg, costs)
8
+ # initseeds!(iseeds, alg, X; kwargs... )
9
+ # initseeds_by_costs!(iseeds, alg, costs; kwargs... )
10
10
#
11
11
# Here:
12
12
# - iseeds: a vector of resultant indexes of the chosen seeds
13
13
# - alg: the seeding algorithm instance
14
14
# - X: the data matrix, each column being a data point
15
15
# - costs: pre-computed pairwise cost matrix.
16
+ # - kwargs: additional kw-arguments, i.e. `rng`
16
17
#
17
18
# This function returns iseeds
18
19
#
@@ -39,8 +40,8 @@ name of the algorithm.
39
40
40
41
Returns the vector of `k` seed indices.
41
42
"""
42
- initseeds (alg:: SeedingAlgorithm , X:: AbstractMatrix{<:Real} , k:: Integer ) =
43
- initseeds! (Vector {Int} (undef, k), alg, X)
43
+ initseeds (alg:: SeedingAlgorithm , X:: AbstractMatrix{<:Real} , k:: Integer ; kwargs ... ) =
44
+ initseeds! (Vector {Int} (undef, k), alg, X; kwargs ... )
44
45
45
46
"""
46
47
initseeds_by_costs(alg::Union{SeedingAlgorithm, Symbol},
@@ -54,8 +55,8 @@ between the points as the cost.
54
55
55
56
Returns the vector of `k` seed indices.
56
57
"""
57
- initseeds_by_costs (alg:: SeedingAlgorithm , costs:: AbstractMatrix{<:Real} , k:: Integer ) =
58
- initseeds_by_costs! (Vector {Int} (undef, k), alg, costs)
58
+ initseeds_by_costs (alg:: SeedingAlgorithm , costs:: AbstractMatrix{<:Real} , k:: Integer ; kwargs ... ) =
59
+ initseeds_by_costs! (Vector {Int} (undef, k), alg, costs; kwargs ... )
59
60
60
61
seeding_algorithm (s:: Symbol ) =
61
62
s == :rand ? RandSeedAlg () :
71
72
check_seeding_args (X:: AbstractMatrix , iseeds:: AbstractVector ) =
72
73
check_seeding_args (size (X, 2 ), length (iseeds))
73
74
74
- initseeds (algname:: Symbol , X:: AbstractMatrix{<:Real} , k:: Integer ) =
75
- initseeds (seeding_algorithm (algname), X, k):: Vector{Int}
75
+ initseeds (algname:: Symbol , X:: AbstractMatrix{<:Real} , k:: Integer ; kwargs ... ) =
76
+ initseeds (seeding_algorithm (algname), X, k; kwargs ... ):: Vector{Int}
76
77
77
- initseeds_by_costs (algname:: Symbol , costs:: AbstractMatrix{<:Real} , k:: Integer ) =
78
- initseeds_by_costs (seeding_algorithm (algname), costs, k)
78
+ initseeds_by_costs (algname:: Symbol , costs:: AbstractMatrix{<:Real} , k:: Integer ; kwargs ... ) =
79
+ initseeds_by_costs (seeding_algorithm (algname), costs, k; kwargs ... )
79
80
80
81
# use specified vector of seeds
81
- function initseeds (iseeds:: AbstractVector{<:Integer} , X:: AbstractMatrix{<:Real} , k:: Integer )
82
+ function initseeds (iseeds:: AbstractVector{<:Integer} , X:: AbstractMatrix{<:Real} , k:: Integer ; kwargs ... )
82
83
length (iseeds) == k ||
83
84
throw (ArgumentError (" The length of seeds vector ($(length (iseeds)) ) differs from the number of seeds requested ($k )" ))
84
85
check_seeding_args (X, iseeds)
@@ -90,8 +91,8 @@ function initseeds(iseeds::AbstractVector{<:Integer}, X::AbstractMatrix{<:Real},
90
91
# NOTE no duplicate checks are done, should we?
91
92
convert (Vector{Int}, iseeds)
92
93
end
93
- initseeds_by_costs (iseeds:: AbstractVector{<:Integer} , costs:: AbstractMatrix{<:Real} , k:: Integer ) =
94
- initseeds (iseeds, costs, k) # NOTE: passing costs as X, but should be fine since only size(X, 2) is used
94
+ initseeds_by_costs (iseeds:: AbstractVector{<:Integer} , costs:: AbstractMatrix{<:Real} , k:: Integer ; kwargs ... ) =
95
+ initseeds (iseeds, costs, k; kwargs ... ) # NOTE: passing costs as X, but should be fine since only size(X, 2) is used
95
96
96
97
function copyseeds! (S:: AbstractMatrix{<:AbstractFloat} ,
97
98
X:: AbstractMatrix{<:Real} ,
@@ -119,9 +120,10 @@ struct RandSeedAlg <: SeedingAlgorithm end
119
120
Initialize `iseeds` with the indices of cluster seeds for the `X` data matrix
120
121
using the `alg` seeding algorithm.
121
122
"""
122
- function initseeds! (iseeds:: IntegerVector , alg:: RandSeedAlg , X:: AbstractMatrix{<:Real} )
123
+ function initseeds! (iseeds:: IntegerVector , alg:: RandSeedAlg , X:: AbstractMatrix{<:Real} ;
124
+ rng:: AbstractRNG = Random. GLOBAL_RNG)
123
125
check_seeding_args (X, iseeds)
124
- sample! (1 : size (X, 2 ), iseeds; replace= false )
126
+ sample! (rng, 1 : size (X, 2 ), iseeds; replace= false )
125
127
end
126
128
127
129
"""
@@ -135,9 +137,9 @@ Here, `costs[i, j]` is the cost of assigning points ``i`` and ``j``
135
137
to the same cluster. One may, for example, use the squared Euclidean distance
136
138
between the points as the cost.
137
139
"""
138
- function initseeds_by_costs! (iseeds:: IntegerVector , alg:: RandSeedAlg , X:: AbstractMatrix{<:Real} )
140
+ function initseeds_by_costs! (iseeds:: IntegerVector , alg:: RandSeedAlg , X:: AbstractMatrix{<:Real} ; rng :: AbstractRNG = Random . GLOBAL_RNG )
139
141
check_seeding_args (X, iseeds)
140
- sample! (1 : size (X,2 ), iseeds; replace= false )
142
+ sample! (rng, 1 : size (X,2 ), iseeds; replace= false )
141
143
end
142
144
143
145
"""
@@ -157,13 +159,14 @@ struct KmppAlg <: SeedingAlgorithm end
157
159
158
160
function initseeds! (iseeds:: IntegerVector , alg:: KmppAlg ,
159
161
X:: AbstractMatrix{<:Real} ,
160
- metric:: PreMetric = SqEuclidean ())
162
+ metric:: PreMetric = SqEuclidean ();
163
+ rng:: AbstractRNG = Random. GLOBAL_RNG)
161
164
n = size (X, 2 )
162
165
k = length (iseeds)
163
166
check_seeding_args (n, k)
164
167
165
168
# randomly pick the first center
166
- p = rand (1 : n)
169
+ p = rand (rng, 1 : n)
167
170
iseeds[1 ] = p
168
171
169
172
if k > 1
@@ -173,7 +176,7 @@ function initseeds!(iseeds::IntegerVector, alg::KmppAlg,
173
176
# pick remaining (with a chance proportional to mincosts)
174
177
tmpcosts = zeros (n)
175
178
for j = 2 : k
176
- p = wsample (1 : n, mincosts)
179
+ p = wsample (rng, 1 : n, mincosts)
177
180
iseeds[j] = p
178
181
179
182
# update mincosts
@@ -188,13 +191,14 @@ function initseeds!(iseeds::IntegerVector, alg::KmppAlg,
188
191
end
189
192
190
193
function initseeds_by_costs! (iseeds:: IntegerVector , alg:: KmppAlg ,
191
- costs:: AbstractMatrix{<:Real} )
194
+ costs:: AbstractMatrix{<:Real} ;
195
+ rng:: AbstractRNG = Random. GLOBAL_RNG)
192
196
n = size (costs, 1 )
193
197
k = length (iseeds)
194
198
check_seeding_args (n, k)
195
199
196
200
# randomly pick the first center
197
- p = rand (1 : n)
201
+ p = rand (rng, 1 : n)
198
202
iseeds[1 ] = p
199
203
200
204
if k > 1
@@ -203,7 +207,7 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg,
203
207
204
208
# pick remaining (with a chance proportional to mincosts)
205
209
for j = 2 : k
206
- p = wsample (1 : n, mincosts)
210
+ p = wsample (rng, 1 : n, mincosts)
207
211
iseeds[j] = p
208
212
209
213
# update mincosts
@@ -230,7 +234,7 @@ Choose the ``k`` points with the highest *centrality* as seeds.
230
234
struct KmCentralityAlg <: SeedingAlgorithm end
231
235
232
236
function initseeds_by_costs! (iseeds:: IntegerVector , alg:: KmCentralityAlg ,
233
- costs:: AbstractMatrix{<:Real} )
237
+ costs:: AbstractMatrix{<:Real} ; kwargs ... )
234
238
235
239
n = size (costs, 1 )
236
240
k = length (iseeds)
@@ -255,5 +259,5 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg,
255
259
end
256
260
257
261
initseeds! (iseeds:: IntegerVector , alg:: KmCentralityAlg , X:: AbstractMatrix{<:Real} ,
258
- metric:: PreMetric = SqEuclidean ()) =
259
- initseeds_by_costs! (iseeds, alg, pairwise (metric, X, dims= 2 ))
262
+ metric:: PreMetric = SqEuclidean (); kwargs ... ) =
263
+ initseeds_by_costs! (iseeds, alg, pairwise (metric, X, dims= 2 ); kwargs ... )
0 commit comments