Skip to content

Commit 7fa53d1

Browse files
authored
Update DefaultIVFVectorsWriter.java
1 parent 78643cd commit 7fa53d1

File tree

1 file changed

+14
-30
lines changed

1 file changed

+14
-30
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ LongValues buildAndWritePostingsLists(
109109
new OptimizedScalarQuantizer(fieldInfo.getVectorSimilarityFunction())
110110
);
111111
final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
112-
int[] docIds = null;
113-
int[] docDeltas = null;
114-
int[] clusterOrds = null;
112+
final int[] docIds = new int[maxPostingListSize];
113+
final int[] docDeltas = new int[maxPostingListSize];
114+
final int[] clusterOrds = new int[maxPostingListSize];
115115
DocIdsWriter idsWriter = new DocIdsWriter();
116116
for (int c = 0; c < centroidSupplier.size(); c++) {
117117
float[] centroid = centroidSupplier.centroid(c);
@@ -125,29 +125,21 @@ LongValues buildAndWritePostingsLists(
125125
int size = cluster.length;
126126
// write docIds
127127
postingsOutput.writeVInt(size);
128-
if (docIds == null || docIds.length < cluster.length) {
129-
docIds = new int[cluster.length];
130-
clusterOrds = new int[cluster.length];
131-
docDeltas = new int[cluster.length];
132-
}
133128
for (int j = 0; j < size; j++) {
134129
docIds[j] = floatVectorValues.ordToDoc(cluster[j]);
135130
clusterOrds[j] = j;
136131
}
137-
final int[] finalDocs = docIds;
138-
final int[] finalOrds = clusterOrds;
139132
// sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
140-
new IntSorter(clusterOrds, i -> finalDocs[i]).sort(0, size);
133+
new IntSorter(clusterOrds, i -> docIds[i]).sort(0, size);
141134
// encode doc deltas
142135
for (int j = 0; j < size; j++) {
143-
docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
136+
docDeltas[j] = j == 0 ? docIds[clusterOrds[j]] : docIds[clusterOrds[j]] - docIds[clusterOrds[j - 1]];
144137
}
145-
final int[] finalDocDeltas = docDeltas;
146-
onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[finalOrds[ord]]);
138+
onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[clusterOrds[ord]]);
147139
// TODO we might want to consider putting the docIds in a separate file
148140
// to aid with only having to fetch vectors from slower storage when they are required
149141
// keeping them in the same file indicates we pull the entire file into cache
150-
idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
142+
idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput);
151143
// write vectors
152144
bulkWriter.writeVectors(onHeapQuantizedVectors);
153145
}
@@ -260,9 +252,9 @@ LongValues buildAndWritePostingsLists(
260252
// write the max posting list size
261253
postingsOutput.writeVInt(maxPostingListSize);
262254
// write the posting lists
263-
int[] docIds = null;
264-
int[] docDeltas = null;
265-
int[] clusterOrds = null;
255+
final int[] docIds = new int[maxPostingListSize];
256+
final int[] docDeltas = new int[maxPostingListSize];
257+
final int[] clusterOrds = new int[maxPostingListSize];
266258
DocIdsWriter idsWriter = new DocIdsWriter();
267259
for (int c = 0; c < centroidSupplier.size(); c++) {
268260
float[] centroid = centroidSupplier.centroid(c);
@@ -277,29 +269,21 @@ LongValues buildAndWritePostingsLists(
277269
// write docIds
278270
int size = cluster.length;
279271
postingsOutput.writeVInt(size);
280-
if (docIds == null || docIds.length < cluster.length) {
281-
docIds = new int[cluster.length];
282-
clusterOrds = new int[cluster.length];
283-
docDeltas = new int[cluster.length];
284-
}
285272
for (int j = 0; j < size; j++) {
286273
docIds[j] = floatVectorValues.ordToDoc(cluster[j]);
287274
clusterOrds[j] = j;
288275
}
289-
final int[] finalDocs = docIds;
290-
final int[] finalOrds = clusterOrds;
291276
// sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
292-
new IntSorter(clusterOrds, i -> finalDocs[i]).sort(0, size);
277+
new IntSorter(clusterOrds, i -> docIds[i]).sort(0, size);
293278
// encode doc deltas
294279
for (int j = 0; j < size; j++) {
295-
docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
280+
docDeltas[j] = j == 0 ? docIds[clusterOrds[j]] : docIds[clusterOrds[j]] - docIds[clusterOrds[j - 1]];
296281
}
297-
final int[] finalDocDeltas = docDeltas;
298-
offHeapQuantizedVectors.reset(size, ord -> isOverspill[finalOrds[ord]], ord -> cluster[finalOrds[ord]]);
282+
offHeapQuantizedVectors.reset(size, ord -> isOverspill[clusterOrds[ord]], ord -> cluster[clusterOrds[ord]]);
299283
// TODO we might want to consider putting the docIds in a separate file
300284
// to aid with only having to fetch vectors from slower storage when they are required
301285
// keeping them in the same file indicates we pull the entire file into cache
302-
idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
286+
idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput);
303287
// write vectors
304288
bulkWriter.writeVectors(offHeapQuantizedVectors);
305289
}

0 commit comments

Comments
 (0)