Skip to content

Commit dd912e4

Browse files
authored
Merge branch 'main' into pem-randomrandom-testing
2 parents 9126d11 + 3ccb52d commit dd912e4

File tree

70 files changed

+3453
-1979
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+3453
-1979
lines changed

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ public void apply(Project project) {
6161
t.getDefinitionsDirectory().set(getDefinitionsDirectory(getResourcesDirectory(project)));
6262
t.getManifestFile().set(project.getLayout().getBuildDirectory().file("generated-resources/manifest.txt"));
6363
});
64+
String resourceRoot = TransportVersionUtils.getResourceRoot(project);
6465
project.getTasks().named(JavaPlugin.PROCESS_RESOURCES_TASK_NAME, Copy.class).configure(t -> {
65-
t.into("transport/definitions", c -> c.from(generateManifestTask));
66+
t.into(resourceRoot + "/definitions", c -> c.from(generateManifestTask));
6667
});
6768
}
6869
}

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionUtils.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,16 @@ static Directory getResourcesDirectory(Project project) {
5050
if (projectName == null) {
5151
projectName = ":server";
5252
}
53+
var resourceRoot = getResourceRoot(project);
5354
Directory projectDir = project.project(projectName.toString()).getLayout().getProjectDirectory();
54-
return projectDir.dir("src/main/resources/transport");
55+
return projectDir.dir("src/main/resources/" + resourceRoot);
56+
}
57+
58+
static String getResourceRoot(Project project) {
59+
var resourceRoot = project.findProperty("org.elasticsearch.transport.resourceRoot");
60+
if (resourceRoot == null) {
61+
resourceRoot = "transport";
62+
}
63+
return resourceRoot.toString();
5564
}
5665
}

docs/changelog/132506.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132506
2+
summary: Add .integration_knowledge system index for usage by AI assistants
3+
area: Infra/Core
4+
type: feature
5+
issues: []

muted-tests.yml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -543,9 +543,6 @@ tests:
543543
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
544544
method: test {p0=search/160_exists_query/Test exists query on unmapped float field}
545545
issue: https://github.com/elastic/elasticsearch/issues/132984
546-
- class: org.elasticsearch.xpack.esql.analysis.AnalyzerTests
547-
method: testMagnitudePlanWithDenseVectorImplicitCasting
548-
issue: https://github.com/elastic/elasticsearch/issues/132985
549546
- class: org.elasticsearch.xpack.search.AsyncSearchErrorTraceIT
550547
method: testAsyncSearchFailingQueryErrorTraceDefault
551548
issue: https://github.com/elastic/elasticsearch/issues/133010
@@ -582,12 +579,18 @@ tests:
582579
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
583580
method: test {p0=search/160_exists_query/Test exists query on _id field}
584581
issue: https://github.com/elastic/elasticsearch/issues/133097
585-
- class: org.elasticsearch.xpack.esql.analysis.AnalyzerTests
586-
method: testNoDenseVectorFailsForMagnitude
587-
issue: https://github.com/elastic/elasticsearch/issues/133013
588582
- class: org.elasticsearch.xpack.ml.integration.TextEmbeddingQueryIT
589583
method: testModelWithPrefixStrings
590584
issue: https://github.com/elastic/elasticsearch/issues/133138
585+
- class: org.elasticsearch.index.mapper.blockloader.IpFieldBlockLoaderTests
586+
method: testBlockLoaderOfMultiField {preference=Params[syntheticSource=true, preference=DOC_VALUES]}
587+
issue: https://github.com/elastic/elasticsearch/issues/133149
588+
- class: org.elasticsearch.index.mapper.blockloader.IpFieldBlockLoaderTests
589+
method: testBlockLoaderOfMultiField {preference=Params[syntheticSource=true, preference=STORED]}
590+
issue: https://github.com/elastic/elasticsearch/issues/133150
591+
- class: org.elasticsearch.index.mapper.blockloader.IpFieldBlockLoaderTests
592+
method: testBlockLoaderOfMultiField {preference=Params[syntheticSource=true, preference=NONE]}
593+
issue: https://github.com/elastic/elasticsearch/issues/133151
591594

592595
# Examples:
593596
#

server/src/main/java/org/elasticsearch/TransportVersion.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,31 +163,32 @@ public static TransportVersion fromBufferedReader(
163163
}
164164
}
165165

166-
public static List<TransportVersion> collectFromInputStreams(
166+
public static List<TransportVersion> collectFromResources(
167167
String component,
168-
Function<String, InputStream> nameToStream,
168+
String resourceRoot,
169+
Function<String, InputStream> resourceLoader,
169170
String latestFileName
170171
) {
171172
TransportVersion latest = parseFromBufferedReader(
172173
component,
173-
"/transport/latest/" + latestFileName,
174-
nameToStream,
174+
resourceRoot + "/latest/" + latestFileName,
175+
resourceLoader,
175176
(c, p, br) -> fromBufferedReader(c, p, true, false, br, Integer.MAX_VALUE)
176177
);
177178
if (latest != null) {
178179
List<String> versionRelativePaths = parseFromBufferedReader(
179180
component,
180-
"/transport/definitions/manifest.txt",
181-
nameToStream,
181+
resourceRoot + "/definitions/manifest.txt",
182+
resourceLoader,
182183
(c, p, br) -> br.lines().filter(line -> line.isBlank() == false).toList()
183184
);
184185
if (versionRelativePaths != null) {
185186
List<TransportVersion> transportVersions = new ArrayList<>();
186187
for (String versionRelativePath : versionRelativePaths) {
187188
TransportVersion transportVersion = parseFromBufferedReader(
188189
component,
189-
"/transport/definitions/" + versionRelativePath,
190-
nameToStream,
190+
resourceRoot + "/definitions/" + versionRelativePath,
191+
resourceLoader,
191192
(c, p, br) -> fromBufferedReader(c, p, false, versionRelativePath.startsWith("named/"), br, latest.id())
192193
);
193194
if (transportVersion != null) {
@@ -426,8 +427,9 @@ private static class VersionsHolder {
426427
static {
427428
// collect all the transport versions from server and es modules/plugins (defined in server)
428429
List<TransportVersion> allVersions = new ArrayList<>(TransportVersions.DEFINED_VERSIONS);
429-
List<TransportVersion> streamVersions = collectFromInputStreams(
430+
List<TransportVersion> streamVersions = collectFromResources(
430431
"<server>",
432+
"/transport",
431433
TransportVersion.class::getResourceAsStream,
432434
Version.CURRENT.major + "." + Version.CURRENT.minor + ".csv"
433435
);

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,50 @@ public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVect
4747
super(state, rawVectorsReader);
4848
}
4949

50+
CentroidIterator getPostingListPrefetchIterator(CentroidIterator centroidIterator, IndexInput postingListSlice) throws IOException {
51+
return new CentroidIterator() {
52+
CentroidOffsetAndLength nextOffsetAndLength = centroidIterator.hasNext()
53+
? centroidIterator.nextPostingListOffsetAndLength()
54+
: null;
55+
56+
{
57+
// prefetch the first one
58+
if (nextOffsetAndLength != null) {
59+
prefetch(nextOffsetAndLength);
60+
}
61+
}
62+
63+
void prefetch(CentroidOffsetAndLength offsetAndLength) throws IOException {
64+
postingListSlice.prefetch(offsetAndLength.offset(), offsetAndLength.length());
65+
}
66+
67+
@Override
68+
public boolean hasNext() {
69+
return nextOffsetAndLength != null;
70+
}
71+
72+
@Override
73+
public CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOException {
74+
CentroidOffsetAndLength offsetAndLength = nextOffsetAndLength;
75+
if (centroidIterator.hasNext()) {
76+
nextOffsetAndLength = centroidIterator.nextPostingListOffsetAndLength();
77+
prefetch(nextOffsetAndLength);
78+
} else {
79+
nextOffsetAndLength = null; // indicate we reached the end
80+
}
81+
return offsetAndLength;
82+
}
83+
};
84+
}
85+
5086
@Override
51-
CentroidIterator getCentroidIterator(FieldInfo fieldInfo, int numCentroids, IndexInput centroids, float[] targetQuery)
52-
throws IOException {
87+
CentroidIterator getCentroidIterator(
88+
FieldInfo fieldInfo,
89+
int numCentroids,
90+
IndexInput centroids,
91+
float[] targetQuery,
92+
IndexInput postingListSlice
93+
) throws IOException {
5394
final FieldEntry fieldEntry = fields.get(fieldInfo.number);
5495
final float globalCentroidDp = fieldEntry.globalCentroidDp();
5596
final OptimizedScalarQuantizer scalarQuantizer = new OptimizedScalarQuantizer(fieldInfo.getVectorSimilarityFunction());
@@ -71,8 +112,9 @@ CentroidIterator getCentroidIterator(FieldInfo fieldInfo, int numCentroids, Inde
71112
final ES92Int7VectorsScorer scorer = ESVectorUtil.getES92Int7VectorsScorer(centroids, fieldInfo.getVectorDimension());
72113
centroids.seek(0L);
73114
int numParents = centroids.readVInt();
115+
CentroidIterator centroidIterator;
74116
if (numParents > 0) {
75-
return getCentroidIteratorWithParents(
117+
centroidIterator = getCentroidIteratorWithParents(
76118
fieldInfo,
77119
centroids,
78120
numParents,
@@ -82,8 +124,18 @@ CentroidIterator getCentroidIterator(FieldInfo fieldInfo, int numCentroids, Inde
82124
queryParams,
83125
globalCentroidDp
84126
);
127+
} else {
128+
centroidIterator = getCentroidIteratorNoParent(
129+
fieldInfo,
130+
centroids,
131+
numCentroids,
132+
scorer,
133+
quantized,
134+
queryParams,
135+
globalCentroidDp
136+
);
85137
}
86-
return getCentroidIteratorNoParent(fieldInfo, centroids, numCentroids, scorer, quantized, queryParams, globalCentroidDp);
138+
return getPostingListPrefetchIterator(centroidIterator, postingListSlice);
87139
}
88140

89141
private static CentroidIterator getCentroidIteratorNoParent(
@@ -115,10 +167,12 @@ public boolean hasNext() {
115167
}
116168

117169
@Override
118-
public long nextPostingListOffset() throws IOException {
170+
public CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOException {
119171
int centroidOrdinal = neighborQueue.pop();
120-
centroids.seek(offset + (long) Long.BYTES * centroidOrdinal);
121-
return centroids.readLong();
172+
centroids.seek(offset + (long) Long.BYTES * 2 * centroidOrdinal);
173+
long postingListOffset = centroids.readLong();
174+
long postingListLength = centroids.readLong();
175+
return new CentroidOffsetAndLength(postingListOffset, postingListLength);
122176
}
123177
};
124178
}
@@ -185,11 +239,13 @@ public boolean hasNext() {
185239
}
186240

187241
@Override
188-
public long nextPostingListOffset() throws IOException {
242+
public CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOException {
189243
int centroidOrdinal = neighborQueue.pop();
190244
updateQueue(); // add one children if available so the queue remains fully populated
191-
centroids.seek(childrenFileOffsets + (long) Long.BYTES * centroidOrdinal);
192-
return centroids.readLong();
245+
centroids.seek(childrenFileOffsets + (long) Long.BYTES * 2 * centroidOrdinal);
246+
long postingListOffset = centroids.readLong();
247+
long postingListLength = centroids.readLong();
248+
return new CentroidOffsetAndLength(postingListOffset, postingListLength);
193249
}
194250

195251
private void updateQueue() throws IOException {
@@ -452,6 +508,7 @@ public int visit(KnnCollector knnCollector) throws IOException {
452508
int scoredDocs = 0;
453509
int limit = vectors - BULK_SIZE + 1;
454510
int i = 0;
511+
455512
for (; i < limit; i += BULK_SIZE) {
456513
final int docsToBulkScore = acceptDocs == null ? BULK_SIZE : docToBulkScore(docIdsScratch, i, acceptDocs);
457514
if (docsToBulkScore == 0) {

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import org.apache.lucene.store.IndexInput;
1919
import org.apache.lucene.store.IndexOutput;
2020
import org.apache.lucene.util.IntroSorter;
21-
import org.apache.lucene.util.LongValues;
2221
import org.apache.lucene.util.VectorUtil;
2322
import org.apache.lucene.util.hnsw.IntToIntFunction;
2423
import org.apache.lucene.util.packed.PackedInts;
@@ -60,7 +59,7 @@ public DefaultIVFVectorsWriter(
6059
}
6160

6261
@Override
63-
LongValues buildAndWritePostingsLists(
62+
CentroidOffsetAndLength buildAndWritePostingsLists(
6463
FieldInfo fieldInfo,
6564
CentroidSupplier centroidSupplier,
6665
FloatVectorValues floatVectorValues,
@@ -102,6 +101,7 @@ LongValues buildAndWritePostingsLists(
102101
postingsOutput.writeVInt(maxPostingListSize);
103102
// write the posting lists
104103
final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
104+
final PackedLongValues.Builder lengths = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
105105
DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter(ES91OSQVectorsScorer.BULK_SIZE, postingsOutput);
106106
OnHeapQuantizedVectors onHeapQuantizedVectors = new OnHeapQuantizedVectors(
107107
floatVectorValues,
@@ -116,7 +116,8 @@ LongValues buildAndWritePostingsLists(
116116
for (int c = 0; c < centroidSupplier.size(); c++) {
117117
float[] centroid = centroidSupplier.centroid(c);
118118
int[] cluster = assignmentsByCluster[c];
119-
offsets.add(postingsOutput.alignFilePointer(Float.BYTES) - fileOffset);
119+
long offset = postingsOutput.alignFilePointer(Float.BYTES) - fileOffset;
120+
offsets.add(offset);
120121
buffer.asFloatBuffer().put(centroid);
121122
// write raw centroid for quantizing the query vectors
122123
postingsOutput.writeBytes(buffer.array(), buffer.array().length);
@@ -142,17 +143,18 @@ LongValues buildAndWritePostingsLists(
142143
idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput);
143144
// write vectors
144145
bulkWriter.writeVectors(onHeapQuantizedVectors);
146+
lengths.add(postingsOutput.getFilePointer() - fileOffset - offset);
145147
}
146148

147149
if (logger.isDebugEnabled()) {
148150
printClusterQualityStatistics(assignmentsByCluster);
149151
}
150152

151-
return offsets.build();
153+
return new CentroidOffsetAndLength(offsets.build(), lengths.build());
152154
}
153155

154156
@Override
155-
LongValues buildAndWritePostingsLists(
157+
CentroidOffsetAndLength buildAndWritePostingsLists(
156158
FieldInfo fieldInfo,
157159
CentroidSupplier centroidSupplier,
158160
FloatVectorValues floatVectorValues,
@@ -243,6 +245,7 @@ LongValues buildAndWritePostingsLists(
243245
// now we can read the quantized vectors from the temporary file
244246
try (IndexInput quantizedVectorsInput = mergeState.segmentInfo.dir.openInput(quantizedVectorsTempName, IOContext.DEFAULT)) {
245247
final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
248+
final PackedLongValues.Builder lengths = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
246249
OffHeapQuantizedVectors offHeapQuantizedVectors = new OffHeapQuantizedVectors(
247250
quantizedVectorsInput,
248251
fieldInfo.getVectorDimension()
@@ -260,7 +263,8 @@ LongValues buildAndWritePostingsLists(
260263
float[] centroid = centroidSupplier.centroid(c);
261264
int[] cluster = assignmentsByCluster[c];
262265
boolean[] isOverspill = isOverspillByCluster[c];
263-
offsets.add(postingsOutput.alignFilePointer(Float.BYTES) - fileOffset);
266+
long offset = postingsOutput.alignFilePointer(Float.BYTES) - fileOffset;
267+
offsets.add(offset);
264268
// write raw centroid for quantizing the query vectors
265269
buffer.asFloatBuffer().put(centroid);
266270
postingsOutput.writeBytes(buffer.array(), buffer.array().length);
@@ -286,12 +290,14 @@ LongValues buildAndWritePostingsLists(
286290
idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput);
287291
// write vectors
288292
bulkWriter.writeVectors(offHeapQuantizedVectors);
293+
lengths.add(postingsOutput.getFilePointer() - fileOffset - offset);
294+
// lengths.add(1);
289295
}
290296

291297
if (logger.isDebugEnabled()) {
292298
printClusterQualityStatistics(assignmentsByCluster);
293299
}
294-
return offsets.build();
300+
return new CentroidOffsetAndLength(offsets.build(), lengths.build());
295301
}
296302
}
297303

@@ -335,24 +341,24 @@ void writeCentroids(
335341
FieldInfo fieldInfo,
336342
CentroidSupplier centroidSupplier,
337343
float[] globalCentroid,
338-
LongValues offsets,
344+
CentroidOffsetAndLength centroidOffsetAndLength,
339345
IndexOutput centroidOutput
340346
) throws IOException {
341347
// TODO do we want to store these distances as well for future use?
342348
// TODO: sort centroids by global centroid (was doing so previously here)
343349
// TODO: sorting tanks recall possibly because centroids ordinals no longer are aligned
344350
if (centroidSupplier.size() > centroidsPerParentCluster * centroidsPerParentCluster) {
345-
writeCentroidsWithParents(fieldInfo, centroidSupplier, globalCentroid, offsets, centroidOutput);
351+
writeCentroidsWithParents(fieldInfo, centroidSupplier, globalCentroid, centroidOffsetAndLength, centroidOutput);
346352
} else {
347-
writeCentroidsWithoutParents(fieldInfo, centroidSupplier, globalCentroid, offsets, centroidOutput);
353+
writeCentroidsWithoutParents(fieldInfo, centroidSupplier, globalCentroid, centroidOffsetAndLength, centroidOutput);
348354
}
349355
}
350356

351357
private void writeCentroidsWithParents(
352358
FieldInfo fieldInfo,
353359
CentroidSupplier centroidSupplier,
354360
float[] globalCentroid,
355-
LongValues offsets,
361+
CentroidOffsetAndLength centroidOffsetAndLength,
356362
IndexOutput centroidOutput
357363
) throws IOException {
358364
DiskBBQBulkWriter.SevenBitDiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.SevenBitDiskBBQBulkWriter(
@@ -392,7 +398,8 @@ private void writeCentroidsWithParents(
392398
for (int i = 0; i < centroidGroups.centroids().length; i++) {
393399
final int[] centroidAssignments = centroidGroups.vectors()[i];
394400
for (int assignment : centroidAssignments) {
395-
centroidOutput.writeLong(offsets.get(assignment));
401+
centroidOutput.writeLong(centroidOffsetAndLength.offsets().get(assignment));
402+
centroidOutput.writeLong(centroidOffsetAndLength.lengths().get(assignment));
396403
}
397404
}
398405
}
@@ -401,7 +408,7 @@ private void writeCentroidsWithoutParents(
401408
FieldInfo fieldInfo,
402409
CentroidSupplier centroidSupplier,
403410
float[] globalCentroid,
404-
LongValues offsets,
411+
CentroidOffsetAndLength centroidOffsetAndLength,
405412
IndexOutput centroidOutput
406413
) throws IOException {
407414
centroidOutput.writeVInt(0);
@@ -419,7 +426,8 @@ private void writeCentroidsWithoutParents(
419426
bulkWriter.writeVectors(quantizedCentroids);
420427
// write the centroid offsets at the end of the file
421428
for (int i = 0; i < centroidSupplier.size(); i++) {
422-
centroidOutput.writeLong(offsets.get(i));
429+
centroidOutput.writeLong(centroidOffsetAndLength.offsets().get(i));
430+
centroidOutput.writeLong(centroidOffsetAndLength.lengths().get(i));
423431
}
424432
}
425433

0 commit comments

Comments
 (0)