@@ -91,31 +91,65 @@ recall topk=20 use-dataset=2717 beam-size=8
91
91
40.00% recall@20
92
92
90 leaf vectors, 143 vectors, 42 full vectors, 13 partitions
93
93
94
+ # Show the nearest partitions to the "easy" vector, ordered by estimated
95
+ # distance to their centroids. Notice that there are several partitions that are
96
+ # very near, and yet the "spread" between centroids is fairly large, which makes
97
+ # finding results easier.
98
+ best-centroids topk=10 use-dataset=8601
99
+ ----
100
+ 151: 0.1696 ± 0.0098 (exact=0.1569)
101
+ 113: 0.2114 ± 0.0091 (exact=0.2164)
102
+ 150: 0.2365 ± 0.0089 (exact=0.2380)
103
+ 155: 0.2836 ± 0.0091 (exact=0.2778)
104
+ 154: 0.2943 ± 0.0108 (exact=0.2954)
105
+ 68: 0.2953 ± 0.0146 (exact=0.3056)
106
+ 97: 0.2988 ± 0.0097 (exact=0.3037)
107
+ 147: 0.2994 ± 0.0156 (exact=0.2853)
108
+ 159: 0.3001 ± 0.0120 (exact=0.2995)
109
+ 139: 0.3274 ± 0.0133 (exact=0.3368)
110
+
111
+ # Show the nearest partitions to the "hard" vector, ordered by estimated
112
+ # distance to their centroids. Notice that the partitions are relatively far
113
+ # away and are bunched together, with low "spread". This makes finding results
114
+ # more difficult.
115
+ best-centroids topk=10 use-dataset=2717
116
+ ----
117
+ 197: 0.5183 ± 0.0161 (exact=0.5179)
118
+ 166: 0.5361 ± 0.0223 (exact=0.5644)
119
+ 170: 0.5403 ± 0.0156 (exact=0.5453)
120
+ 30: 0.5524 ± 0.0197 (exact=0.5515)
121
+ 196: 0.5546 ± 0.0206 (exact=0.5621)
122
+ 187: 0.5646 ± 0.0171 (exact=0.5625)
123
+ 135: 0.5674 ± 0.0234 (exact=0.6034)
124
+ 177: 0.5708 ± 0.0254 (exact=0.5674)
125
+ 61: 0.5755 ± 0.0211 (exact=0.5581)
126
+ 183: 0.5777 ± 0.0159 (exact=0.5915)
127
+
94
128
# Test recall at different beam sizes.
95
129
recall topk=10 beam-size=2 samples=64
96
130
----
97
- 34.22 % recall@10
131
+ 29.84 % recall@10
98
132
21 leaf vectors, 42 vectors, 15 full vectors, 4 partitions
99
133
100
134
recall topk=10 beam-size=4 samples=64
101
135
----
102
- 50.31 % recall@10
103
- 42 leaf vectors, 73 vectors, 19 full vectors, 7 partitions
136
+ 47.97 % recall@10
137
+ 42 leaf vectors, 74 vectors, 19 full vectors, 7 partitions
104
138
105
139
recall topk=10 beam-size=8 samples=64
106
140
----
107
- 73.75 % recall@10
108
- 84 leaf vectors, 137 vectors, 23 full vectors, 13 partitions
141
+ 69.06 % recall@10
142
+ 85 leaf vectors, 138 vectors, 24 full vectors, 13 partitions
109
143
110
144
recall topk=10 beam-size=16 samples=64
111
145
----
112
- 87.81 % recall@10
113
- 168 leaf vectors, 262 vectors, 26 full vectors, 25 partitions
146
+ 87.66 % recall@10
147
+ 168 leaf vectors, 263 vectors, 27 full vectors, 25 partitions
114
148
115
149
recall topk=10 beam-size=32 samples=64
116
150
----
117
- 97.50 % recall@10
118
- 335 leaf vectors, 441 vectors, 29 full vectors, 42 partitions
151
+ 95.62 % recall@10
152
+ 336 leaf vectors, 442 vectors, 30 full vectors, 42 partitions
119
153
120
154
# ----------------------------------------------------------------------
121
155
# Compare orderings of same dataset with different distance metrics.
@@ -255,23 +289,23 @@ CV stats:
255
289
256
290
recall topk=10 beam-size=4 samples=50
257
291
----
258
- 62.40 % recall@10
259
- 42 leaf vectors, 72 vectors, 18 full vectors, 7 partitions
292
+ 61.20 % recall@10
293
+ 42 leaf vectors, 72 vectors, 20 full vectors, 7 partitions
260
294
261
295
recall topk=10 beam-size=8 samples=50
262
296
----
263
- 83.40 % recall@10
297
+ 79.80 % recall@10
264
298
83 leaf vectors, 133 vectors, 21 full vectors, 13 partitions
265
299
266
300
recall topk=10 beam-size=16 samples=50
267
301
----
268
- 92.60 % recall@10
269
- 166 leaf vectors, 257 vectors, 24 full vectors, 25 partitions
302
+ 91.00 % recall@10
303
+ 165 leaf vectors, 256 vectors, 24 full vectors, 25 partitions
270
304
271
305
recall topk=10 beam-size=32 samples=50
272
306
----
273
- 98 .20% recall@10
274
- 329 leaf vectors, 431 vectors, 25 full vectors, 42 partitions
307
+ 97 .20% recall@10
308
+ 329 leaf vectors, 431 vectors, 26 full vectors, 42 partitions
275
309
276
310
# ----------------------------------------------------------------------
277
311
# Load 950 768-dimension image embeddings and search them using
@@ -288,20 +322,20 @@ CV stats:
288
322
289
323
recall topk=10 beam-size=4 samples=50
290
324
----
291
- 55.80 % recall@10
292
- 44 leaf vectors, 74 vectors, 19 full vectors, 7 partitions
325
+ 48.60 % recall@10
326
+ 44 leaf vectors, 76 vectors, 20 full vectors, 7 partitions
293
327
294
328
recall topk=10 beam-size=8 samples=50
295
329
----
296
- 74.40 % recall@10
297
- 88 leaf vectors, 143 vectors, 23 full vectors, 13 partitions
330
+ 69.00 % recall@10
331
+ 88 leaf vectors, 144 vectors, 25 full vectors, 13 partitions
298
332
299
333
recall topk=10 beam-size=16 samples=50
300
334
----
301
- 89 .00% recall@10
302
- 172 leaf vectors, 271 vectors, 27 full vectors, 25 partitions
335
+ 85 .00% recall@10
336
+ 173 leaf vectors, 272 vectors, 30 full vectors, 25 partitions
303
337
304
338
recall topk=10 beam-size=32 samples=50
305
339
----
306
- 97.60 % recall@10
307
- 344 leaf vectors, 443 vectors, 30 full vectors, 41 partitions
340
+ 95.20 % recall@10
341
+ 342 leaf vectors, 441 vectors, 33 full vectors, 41 partitions
0 commit comments