Skip to content

Commit 51f81eb

Browse files
committed
code cleanups and unification for *SSL
1 parent 746fa6e commit 51f81eb

File tree

3 files changed

+78
-49
lines changed

3 files changed

+78
-49
lines changed

elki-clustering/src/main/java/elki/clustering/hierarchical/BufferedSearchSingleLink.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import elki.distance.minkowski.EuclideanDistance;
3737
import elki.logging.Logging;
3838
import elki.logging.progress.FiniteProgress;
39+
import elki.utilities.datastructures.heap.DoubleIntegerHeap;
3940
import elki.utilities.datastructures.heap.DoubleIntegerMinHeap;
4041
import elki.utilities.documentation.Reference;
4142
import elki.utilities.optionhandling.Parameterizer;
@@ -98,7 +99,9 @@ public TypeInformation[] getInputTypeRestriction() {
9899
public ClusterMergeHistory run(Relation<O> relation) {
99100
DBIDEnum ids = DBIDUtil.ensureEnum(relation.getDBIDs());
100101
ClusterMergeHistoryBuilder builder = new ClusterMergeHistoryBuilder(ids, distance.isSquared());
101-
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance).priorityByDBID();
102+
// Create one for testing we have a suitable index.
103+
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance) //
104+
.lowSelectivity().priorityByDBID();
102105
if(pq instanceof LinearScanPrioritySearcher || pq instanceof LinearScanEuclideanPrioritySearcher) {
103106
throw new UnsupportedOperationException("No index acceleration available. This will be very slow.");
104107
}
@@ -168,9 +171,9 @@ public Instance(DBIDEnum ids, ClusterMergeHistoryBuilder builder, PrioritySearch
168171
*/
169172
public void run() {
170173
initializeHeap();
171-
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size() - 1, LOG) : null;
174+
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size(), LOG) : null;
172175
if(cprog != null) {
173-
cprog.setProcessed(builder.mergecount, LOG);
176+
cprog.setProcessed(builder.mergecount + 1, LOG);
174177
}
175178
while(true) {
176179
final double curd = heap.peekKey();
@@ -197,7 +200,6 @@ public void run() {
197200
heap.replaceTopElement(nn.peekKey(), a);
198201
}
199202
LOG.ensureCompleted(cprog);
200-
assert builder.mergecount == ids.size() - 1;
201203
}
202204

203205
/**
@@ -214,8 +216,7 @@ private void initializeHeap() {
214216
continue; // duplicate
215217
}
216218
DoubleIntegerMinHeap h = heaps[a] = new DoubleIntegerMinHeap();
217-
double t = Double.POSITIVE_INFINITY;
218-
for(pq.search(ita); pq.valid() && pq.allLowerBound() < t; pq.advance()) {
219+
for(pq.search(ita); pq.valid(); pq.advance()) {
219220
final int b = ids.index(pq);
220221
if(a == b) {
221222
continue;
@@ -229,10 +230,10 @@ private void initializeHeap() {
229230
continue;
230231
}
231232
h.add(d, b);
232-
pq.decreaseCutoff(t = h.peekKey());
233+
pq.decreaseCutoff(h.peekKey());
233234
}
234235
if(!h.isEmpty()) {
235-
heap.add(t, a);
236+
heap.add(h.peekKey(), a);
236237
threshold[a] = pq.allLowerBound();
237238
}
238239
}
@@ -256,23 +257,30 @@ private void initializeHeap() {
256257
private void refillNeighbors(int a, int ca) {
257258
DoubleIntegerMinHeap h = heaps[a];
258259
double thres = h.isEmpty() ? Double.POSITIVE_INFINITY : h.peekKey();
260+
// Avoid adding entries repeatedly
261+
boolean[] seen = new boolean[ids.size()];
262+
for(DoubleIntegerHeap.UnsortedIter it = h.unsortedIter(); it.valid(); it.advance()) {
263+
seen[it.getValue()] = true;
264+
}
259265
final double skip = threshold[a];
260266
if(last != a) {
261267
pq.search(ita.seek(a)).increaseSkip(skip);
262268
last = a;
263269
}
264270
for(; pq.valid() && pq.allLowerBound() < thres; pq.advance()) {
265271
final int b = ids.index(pq);
266-
if(a == b || builder.get(b) == ca) {
272+
if(a == b || builder.get(b) == ca || seen[b]) {
267273
continue;
268274
}
269-
final double dist = pq.computeExactDistance();
270-
if(dist < skip) {
275+
double d = pq.computeExactDistance();
276+
if(d < skip) {
271277
continue;
272278
}
273-
h.add(dist, b);
279+
h.add(d, b);
274280
thres = h.peekKey();
281+
// do not use pq.decreaseCutoff, as we may continue with the searcher
275282
}
283+
// Save the current lower bound
276284
threshold[a] = pq.allLowerBound() < thres ? pq.allLowerBound() : Double.POSITIVE_INFINITY;
277285
}
278286
}

elki-clustering/src/main/java/elki/clustering/hierarchical/HeapOfSearchersSingleLink.java

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,17 @@ protected class Instance {
128128
protected ClusterMergeHistoryBuilder builder;
129129

130130
/**
131-
* Primary heap.
131+
* Priority searchers.
132132
*/
133-
private DoubleIntegerMinHeap heap;
133+
protected PrioritySearcher<DBIDRef>[] pqs;
134134

135135
/**
136-
* Priority searchers.
136+
* Primary heap.
137137
*/
138-
protected PrioritySearcher<DBIDRef>[] pqs;
138+
private DoubleIntegerMinHeap heap;
139139

140140
/**
141-
* Auxillary heaps.
141+
* Auxiliary heaps.
142142
*/
143143
private DoubleIntegerMinHeap[] heaps;
144144

@@ -158,9 +158,9 @@ public Instance(DBIDEnum ids, ClusterMergeHistoryBuilder builder) {
158158
*/
159159
public void run(Relation<? extends O> relation) {
160160
initializeHeap(relation);
161-
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size() - 1, LOG) : null;
161+
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size(), LOG) : null;
162162
if(cprog != null) {
163-
cprog.setProcessed(builder.mergecount, LOG);
163+
cprog.setProcessed(builder.mergecount + 1, LOG);
164164
}
165165
while(true) {
166166
final double curd = heap.peekKey();
@@ -182,7 +182,7 @@ public void run(Relation<? extends O> relation) {
182182
}
183183
if(nn.isEmpty()) {
184184
heap.poll();
185-
return;
185+
continue;
186186
}
187187
heap.replaceTopElement(nn.peekKey(), a);
188188
}
@@ -192,7 +192,7 @@ public void run(Relation<? extends O> relation) {
192192
/**
193193
* Build the initial heap.
194194
*
195-
* @param relation Data relation
195+
* @param relation data relation
196196
*/
197197
private void initializeHeap(Relation<? extends O> relation) {
198198
FiniteProgress iprog = LOG.isVerbose() ? new FiniteProgress("Heap initialization", ids.size(), LOG) : null;
@@ -218,13 +218,14 @@ private void initializeHeap(Relation<? extends O> relation) {
218218
final double d = pq.computeExactDistance();
219219
if(d == 0.) { // duplicate, merge immediately
220220
int cb = builder.get(b);
221-
if (ca != cb) {
221+
if(ca != cb) {
222222
ca = builder.add(ca, 0, cb);
223223
}
224224
continue;
225225
}
226226
h.add(d, b);
227227
thres = h.peekKey();
228+
// do not use pq.decreaseCutoff, as we continue later
228229
}
229230
if(!h.isEmpty()) {
230231
heap.add(thres, a);
@@ -253,6 +254,7 @@ private void refillNeighbors(int a, int ca) {
253254
}
254255
h.add(pq.computeExactDistance(), b);
255256
thres = h.peekKey();
257+
// do not use pq.decreaseCutoff, as we continue with the searcher
256258
}
257259
}
258260
}

elki-clustering/src/main/java/elki/clustering/hierarchical/RestartingSearchSingleLink.java

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,10 @@ public Instance(DBIDEnum ids, ClusterMergeHistoryBuilder builder, PrioritySearch
167167
*/
168168
public void run() {
169169
initializeHeap();
170-
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size() - 1, LOG) : null;
170+
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size(), LOG) : null;
171171
if(cprog != null) {
172-
cprog.setProcessed(builder.mergecount, LOG);
172+
cprog.setProcessed(builder.mergecount + 1, LOG);
173173
}
174-
int last = -1; // last used searcher
175174
while(true) {
176175
final double curd = heap.peekKey();
177176
int a = heap.peekValue(), b = nns[a];
@@ -184,32 +183,14 @@ public void run() {
184183
}
185184
}
186185
// Update nn of a:
187-
double dist = Double.POSITIVE_INFINITY;
188-
int best = -1;
189-
if(last != a) {
190-
pq.search(ita.seek(a)).increaseSkip(curd);
191-
last = a;
192-
}
193-
for(; pq.valid() && pq.allLowerBound() < dist; pq.advance()) {
194-
int nb = ids.index(pq);
195-
if(a == nb || pq.getUpperBound() < curd || builder.get(nb) == ca) {
196-
continue;
197-
}
198-
double d = pq.computeExactDistance();
199-
if(d < dist) {
200-
best = ids.index(pq);
201-
dist = d;
202-
}
203-
}
204-
nns[a] = best;
205-
if(best < 0) {
186+
double dist = refillNeighbors(a, ca, curd);
187+
if(nns[a] < 0) { // or dist == Double.POSITIVE_INFINITY
206188
heap.poll();
207189
continue;
208190
}
209191
heap.replaceTopElement(dist, a);
210192
}
211193
LOG.ensureCompleted(cprog);
212-
assert builder.mergecount == ids.size() - 1;
213194
}
214195

215196
/**
@@ -225,7 +206,7 @@ private void initializeHeap() {
225206
continue; // duplicate
226207
}
227208
int best = -1;
228-
double thresh = Double.POSITIVE_INFINITY;
209+
double bestd = Double.POSITIVE_INFINITY;
229210
for(pq.search(ita); pq.valid(); pq.advance()) {
230211
final int b = ids.index(pq);
231212
if(a == b) {
@@ -239,13 +220,13 @@ private void initializeHeap() {
239220
}
240221
continue;
241222
}
242-
if(d < thresh) {
223+
if(d < bestd) {
243224
best = b;
244-
pq.decreaseCutoff(thresh = d);
225+
pq.decreaseCutoff(bestd = d);
245226
}
246227
}
247228
if(best >= 0) {
248-
heap.add(thresh, a);
229+
heap.add(bestd, a);
249230
nns[a] = best;
250231
}
251232
}
@@ -254,6 +235,44 @@ private void initializeHeap() {
254235
LOG.debug("Performed " + builder.mergecount + " merges of duplicates (may involve more objects) during initialization.");
255236
}
256237
}
238+
239+
/**
240+
* Last id used for refilling
241+
*/
242+
int last = -1;
243+
244+
/**
245+
* Refill the nearest neighbors.
246+
*
247+
* @param a Query object number
248+
* @param ca Cluster id of the query object
249+
* @param skip Current distance, for skipping
250+
*/
251+
private double refillNeighbors(int a, int ca, double skip) {
252+
double thres = Double.POSITIVE_INFINITY;
253+
int best = -1;
254+
if(last != a) {
255+
pq.search(ita.seek(a)).increaseSkip(skip);
256+
last = a;
257+
}
258+
for(; pq.valid() && pq.allLowerBound() < thres; pq.advance()) {
259+
final int b = ids.index(pq);
260+
if(a == b || builder.get(b) == ca) {
261+
continue;
262+
}
263+
double d = pq.computeExactDistance();
264+
if(d < skip) {
265+
continue;
266+
}
267+
if(d < thres) {
268+
best = b;
269+
thres = d;
270+
// do not use pq.decreaseCutoff, as we may continue with the searcher
271+
}
272+
}
273+
nns[a] = best;
274+
return thres;
275+
}
257276
}
258277

259278
/**

0 commit comments

Comments
 (0)