Skip to content

Commit a85c16f

Browse files
committed
clean up related to temp file
1 parent 09c7c0e commit a85c16f

File tree

3 files changed

+38
-67
lines changed

3 files changed

+38
-67
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 19 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,8 @@ private static void printClusterQualityStatistics(int[][] clusters) {
118118
}
119119

120120
@Override
121-
CentroidSupplier createCentroidSupplier(
122-
IndexInput centroidsInput,
123-
int numParentCentroids,
124-
int numCentroids,
125-
int numClusters,
126-
FieldInfo fieldInfo,
127-
float[] globalCentroid,
128-
IntIntMap clusterToCentroidMap
129-
) {
130-
return new OffHeapCentroidSupplier(centroidsInput, numParentCentroids, numCentroids, numClusters, fieldInfo, clusterToCentroidMap);
121+
CentroidSupplier createCentroidSupplier(IndexInput centroidsInput, int numClusters, FieldInfo fieldInfo) {
122+
return new OffHeapCentroidSupplier(centroidsInput, numClusters, fieldInfo);
131123
}
132124

133125
private static void writeQuantizedCentroid(
@@ -181,7 +173,7 @@ static IntIntMap writePartitionsAndCentroids(
181173
centroidOutput
182174
);
183175
// TODO: put at the end of the parents region
184-
centroidOutput.writeInt(centroidPartition.childOrdinal());
176+
centroidOutput.writeInt(centroidPartition.childOffset());
185177
centroidOutput.writeInt(centroidPartition.size());
186178
}
187179
}
@@ -252,7 +244,14 @@ CentroidAssignments calculateAndWriteCentroids(
252244
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid);
253245
}
254246

255-
record CentroidPartition(float[] centroid, int childOrdinal, int size, int[] assignments) {}
247+
/**
248+
*
249+
* @param centroid the parent centroid of some set of children
250+
* @param childOffset the offset of the first child within the partition defined by this parent
251+
* @param size the number of children in this partition
252+
* @param assignments the set of centroid ordinals (potentially duplicative) of child centroids that belong to this parent
253+
*/
254+
record CentroidPartition(float[] centroid, int childOffset, int size, int[] assignments) {}
256255

257256
/**
258257
* Calculate the centroids for the given field and write them to the given centroid output.
@@ -426,30 +425,16 @@ static void writeQuantizedValue(IndexOutput indexOutput, byte[] binaryValue, Opt
426425

427426
static class OffHeapCentroidSupplier implements CentroidSupplier {
428427
private final IndexInput centroidsInput;
429-
private final int numClusters;
430428
private final int dimension;
431429
private final float[] scratch;
432-
private final long rawCentroidOffset;
433430
private int currOrd = -1;
434-
private final IntIntMap clusterToCentroidMap;
435-
436-
OffHeapCentroidSupplier(
437-
IndexInput centroidsInput,
438-
int numParentCentroids,
439-
int numCentroids,
440-
int numClusters,
441-
FieldInfo info,
442-
IntIntMap clusterToCentroidMap
443-
) {
431+
private final int numClusters;
432+
433+
OffHeapCentroidSupplier(IndexInput centroidsInput, int numClusters, FieldInfo info) {
444434
this.centroidsInput = centroidsInput;
445-
this.numClusters = numClusters;
446435
this.dimension = info.getVectorDimension();
447436
this.scratch = new float[dimension];
448-
long quantizedVectorByteSize = dimension + 3 * Float.BYTES + Short.BYTES;
449-
long quantizedVectorNodeByteSize = quantizedVectorByteSize + Integer.BYTES;
450-
long parentNodeByteSize = quantizedVectorByteSize + 2 * Integer.BYTES;
451-
this.rawCentroidOffset = numParentCentroids * parentNodeByteSize + numCentroids * quantizedVectorNodeByteSize;
452-
this.clusterToCentroidMap = clusterToCentroidMap;
437+
this.numClusters = numClusters;
453438
}
454439

455440
@Override
@@ -458,19 +443,13 @@ public int size() {
458443
}
459444

460445
@Override
461-
public float[] centroid(int clusterOrdinal) throws IOException {
462-
if (clusterOrdinal == currOrd) {
446+
public float[] centroid(int centroidOrdinal) throws IOException {
447+
if (centroidOrdinal == currOrd) {
463448
return scratch;
464449
}
465-
int centroidOrdinal;
466-
if (clusterToCentroidMap != null) {
467-
centroidOrdinal = clusterToCentroidMap.get(clusterOrdinal);
468-
} else {
469-
centroidOrdinal = clusterOrdinal;
470-
}
471-
centroidsInput.seek(rawCentroidOffset + (long) centroidOrdinal * dimension * Float.BYTES);
450+
centroidsInput.seek((long) centroidOrdinal * dimension * Float.BYTES);
472451
centroidsInput.readFloats(scratch, 0, dimension);
473-
this.currOrd = clusterOrdinal;
452+
this.currOrd = centroidOrdinal;
474453
return scratch;
475454
}
476455
}

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,9 @@ public final void search(String field, float[] target, KnnCollector knnCollector
259259

260260
FieldEntry entry = fields.get(fieldInfo.number);
261261

262+
// TODO: pass numClusters via meta rather than computing it here?
262263
int numCentroids = entry.childCentroidCount();
264+
int numClusters = entry.childCentroidCount() / 2;
263265

264266
CentroidWClusterOffsetQueryScorer centroidQueryScorer = getChildCentroidScorer(
265267
fieldInfo,
@@ -273,9 +275,9 @@ public final void search(String field, float[] target, KnnCollector knnCollector
273275
// scaling by the number of centroids vs. the nearest neighbors requested
274276
// not perfect, but a comparative heuristic.
275277
// we might want to utilize the total vector count as well, but this is a good start
276-
nProbe = (int) Math.round(Math.log10(centroidQueryScorer.size()) * Math.sqrt(knnCollector.k()));
278+
nProbe = (int) Math.round(Math.log10(numClusters) * Math.sqrt(knnCollector.k()));
277279
// clip to be between 1 and the number of centroids
278-
nProbe = Math.max(Math.min(nProbe, centroidQueryScorer.size()), 1);
280+
nProbe = Math.max(Math.min(nProbe, numClusters), 1);
279281
}
280282

281283
int centroidsVisited = 0;

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,7 @@ abstract long[] buildAndWritePostingsLists(
142142
int[][] assignmentsByCluster
143143
) throws IOException;
144144

145-
abstract CentroidSupplier createCentroidSupplier(
146-
IndexInput centroidsInput,
147-
int numParentCentroids,
148-
int numCentroids,
149-
int numClusters,
150-
FieldInfo fieldInfo,
151-
float[] globalCentroid,
152-
IntIntMap clusterToCentroidMap
153-
) throws IOException;
145+
abstract CentroidSupplier createCentroidSupplier(IndexInput centroidsInput, int numCentroids, FieldInfo fieldInfo) throws IOException;
154146

155147
@Override
156148
public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
@@ -299,25 +291,35 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
299291
final int numClusters;
300292
final int[][] assignmentsByCluster;
301293
final float[] calculatedGlobalCentroid = new float[fieldInfo.getVectorDimension()];
302-
IntIntMap clusterToCentroidMap;
303294
String centroidTempName = null;
304295
IndexOutput centroidTemp = null;
305296
success = false;
306297
try {
307298
centroidTemp = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "civf_", IOContext.DEFAULT);
308299
centroidTempName = centroidTemp.getName();
300+
301+
centroidOffset = ivfCentroids.alignFilePointer(Float.BYTES);
309302
CentroidAssignments centroidAssignments = calculateAndWriteCentroids(
310303
fieldInfo,
311304
getFloatVectorValues(fieldInfo, docs, vectors, numVectors),
312-
centroidTemp,
305+
ivfCentroids,
313306
mergeState,
314307
calculatedGlobalCentroid
315308
);
309+
centroidLength = ivfCentroids.getFilePointer() - centroidOffset;
310+
311+
final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
312+
for (int i = 0; i < centroidAssignments.centroids().length; i++) {
313+
float[] centroid = centroidAssignments.centroids()[i];
314+
buffer.asFloatBuffer().put(centroid);
315+
centroidTemp.writeBytes(buffer.array(), buffer.array().length);
316+
}
317+
316318
numParentCentroids = centroidAssignments.numParentCentroids();
319+
// TODO: remove this from meta when not used on the read side; we currently use it in degenerate cases to read all centroids
317320
numCentroids = centroidAssignments.numCentroids();
318321
numClusters = centroidAssignments.centroids().length;
319322
assignmentsByCluster = centroidAssignments.assignmentsByCluster();
320-
clusterToCentroidMap = centroidAssignments.clusterToCentroidMap();
321323
success = true;
322324
} finally {
323325
if (success == false && centroidTempName != null) {
@@ -327,28 +329,16 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
327329
}
328330
try {
329331
if (numCentroids == 0) {
330-
centroidOffset = ivfCentroids.getFilePointer();
331332
writeMeta(fieldInfo, numParentCentroids, numCentroids, centroidOffset, 0, new long[0], null);
332333
CodecUtil.writeFooter(centroidTemp);
333334
IOUtils.close(centroidTemp);
334335
return;
335336
}
336337
CodecUtil.writeFooter(centroidTemp);
337338
IOUtils.close(centroidTemp);
338-
centroidOffset = ivfCentroids.alignFilePointer(Float.BYTES);
339339
try (IndexInput centroidsInput = mergeState.segmentInfo.dir.openInput(centroidTempName, IOContext.DEFAULT)) {
340-
ivfCentroids.copyBytes(centroidsInput, centroidsInput.length() - CodecUtil.footerLength());
341-
centroidLength = ivfCentroids.getFilePointer() - centroidOffset;
342340

343-
CentroidSupplier centroidSupplier = createCentroidSupplier(
344-
centroidsInput,
345-
numParentCentroids,
346-
numCentroids,
347-
numClusters,
348-
fieldInfo,
349-
calculatedGlobalCentroid,
350-
clusterToCentroidMap
351-
);
341+
CentroidSupplier centroidSupplier = createCentroidSupplier(centroidsInput, numClusters, fieldInfo);
352342

353343
// build a float vector values with random access
354344
// build centroids

0 commit comments

Comments
 (0)