Skip to content

Commit 9f5bb1c

Browse files
authored
Merge branch 'main' into lucene_10_2_1
2 parents 9c40ddc + 45d321d commit 9f5bb1c

File tree

13 files changed

+963
-19
lines changed

13 files changed

+963
-19
lines changed

docs/changelog/127225.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127225
2+
summary: Fix count optimization with pushable union types
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 127200

muted-tests.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,6 @@ tests:
408408
- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT
409409
method: test {rerank.Reranker before a limit ASYNC}
410410
issue: https://github.com/elastic/elasticsearch/issues/127051
411-
- class: org.elasticsearch.xpack.esql.qa.single_node.EsqlSpecIT
412-
method: test {rrf.SimpleRrf ASYNC}
413-
issue: https://github.com/elastic/elasticsearch/issues/127063
414411
- class: org.elasticsearch.packaging.test.DockerTests
415412
method: test026InstallBundledRepositoryPlugins
416413
issue: https://github.com/elastic/elasticsearch/issues/127081
@@ -426,9 +423,6 @@ tests:
426423
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeIT
427424
method: test
428425
issue: https://github.com/elastic/elasticsearch/issues/127157
429-
- class: org.elasticsearch.xpack.esql.qa.single_node.EsqlSpecIT
430-
method: test {fork.ForkWithWhereSortDescAndLimit SYNC}
431-
issue: https://github.com/elastic/elasticsearch/issues/127326
432426
- class: org.elasticsearch.geometry.utils.SpatialEnvelopeVisitorTests
433427
method: testVisitGeoPointsWrapping
434428
issue: https://github.com/elastic/elasticsearch/issues/123425
@@ -450,6 +444,9 @@ tests:
450444
- class: org.elasticsearch.action.admin.cluster.state.TransportClusterStateActionDisruptionIT
451445
method: testLocalRequestAlwaysSucceeds
452446
issue: https://github.com/elastic/elasticsearch/issues/127423
447+
- class: org.elasticsearch.action.admin.cluster.state.TransportClusterStateActionDisruptionIT
448+
method: testLocalRequestWaitsForMetadata
449+
issue: https://github.com/elastic/elasticsearch/issues/127466
453450

454451
# Examples:
455452
#

server/src/main/java/org/elasticsearch/index/codec/vectors/KMeans.java

Lines changed: 494 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* @notice
3+
* Licensed to the Apache Software Foundation (ASF) under one or more
4+
* contributor license agreements. See the NOTICE file distributed with
5+
* this work for additional information regarding copyright ownership.
6+
* The ASF licenses this file to You under the Apache License, Version 2.0
7+
* (the "License"); you may not use this file except in compliance with
8+
* the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Modifications copyright (C) 2025 Elasticsearch B.V.
19+
*/
20+
package org.elasticsearch.index.codec.vectors;
21+
22+
import org.apache.lucene.codecs.lucene95.HasIndexSlice;
23+
import org.apache.lucene.index.FloatVectorValues;
24+
import org.apache.lucene.store.IndexInput;
25+
import org.apache.lucene.util.Bits;
26+
27+
import java.io.IOException;
28+
import java.util.Random;
29+
import java.util.function.IntUnaryOperator;
30+
31+
class SampleReader extends FloatVectorValues implements HasIndexSlice {
32+
private final FloatVectorValues origin;
33+
private final int sampleSize;
34+
private final IntUnaryOperator sampleFunction;
35+
36+
SampleReader(FloatVectorValues origin, int sampleSize, IntUnaryOperator sampleFunction) {
37+
this.origin = origin;
38+
this.sampleSize = sampleSize;
39+
this.sampleFunction = sampleFunction;
40+
}
41+
42+
@Override
43+
public int size() {
44+
return sampleSize;
45+
}
46+
47+
@Override
48+
public int dimension() {
49+
return origin.dimension();
50+
}
51+
52+
@Override
53+
public FloatVectorValues copy() throws IOException {
54+
throw new IllegalStateException("Not supported");
55+
}
56+
57+
@Override
58+
public IndexInput getSlice() {
59+
return ((HasIndexSlice) origin).getSlice();
60+
}
61+
62+
@Override
63+
public float[] vectorValue(int targetOrd) throws IOException {
64+
return origin.vectorValue(sampleFunction.applyAsInt(targetOrd));
65+
}
66+
67+
@Override
68+
public int getVectorByteLength() {
69+
return origin.getVectorByteLength();
70+
}
71+
72+
@Override
73+
public int ordToDoc(int ord) {
74+
throw new IllegalStateException("Not supported");
75+
}
76+
77+
@Override
78+
public Bits getAcceptOrds(Bits acceptDocs) {
79+
throw new IllegalStateException("Not supported");
80+
}
81+
82+
static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) {
83+
// TODO can we do something algorithmically that aligns an ordinal with a unique integer between 0 and numVectors?
84+
if (k >= origin.size()) {
85+
new SampleReader(origin, origin.size(), i -> i);
86+
}
87+
// TODO maybe use bigArrays?
88+
int[] samples = reservoirSample(origin.size(), k, seed);
89+
return new SampleReader(origin, samples.length, i -> samples[i]);
90+
}
91+
92+
/**
93+
* Sample k elements from n elements according to reservoir sampling algorithm.
94+
*
95+
* @param n number of elements
96+
* @param k number of samples
97+
* @param seed random seed
98+
* @return array of k samples
99+
*/
100+
public static int[] reservoirSample(int n, int k, long seed) {
101+
Random rnd = new Random(seed);
102+
int[] reservoir = new int[k];
103+
for (int i = 0; i < k; i++) {
104+
reservoir[i] = i;
105+
}
106+
for (int i = k; i < n; i++) {
107+
int j = rnd.nextInt(i + 1);
108+
if (j < k) {
109+
reservoir[j] = i;
110+
}
111+
}
112+
return reservoir;
113+
}
114+
115+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
2424
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
2525
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
26+
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader;
2627
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter;
2728
import org.apache.lucene.index.SegmentReadState;
2829
import org.apache.lucene.index.SegmentWriteState;
@@ -62,7 +63,16 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio
6263

6364
@Override
6465
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
65-
return new DirectIOLucene99FlatVectorsReader(state, vectorsScorer);
66+
if (DirectIOLucene99FlatVectorsReader.shouldUseDirectIO(state)) {
67+
// Use mmap for merges and direct I/O for searches.
68+
// TODO: Open the mmap file with sequential access instead of random (current behavior).
69+
return new MergeReaderWrapper(
70+
new DirectIOLucene99FlatVectorsReader(state, vectorsScorer),
71+
new Lucene99FlatVectorsReader(state, vectorsScorer)
72+
);
73+
} else {
74+
return new Lucene99FlatVectorsReader(state, vectorsScorer);
75+
}
6676
}
6777

6878
@Override

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ public DirectIOLucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScor
8585
}
8686
}
8787

88+
public static boolean shouldUseDirectIO(SegmentReadState state) {
89+
return USE_DIRECT_IO && state.directory instanceof DirectIOIndexInputSupplier;
90+
}
91+
8892
private int readMetadata(SegmentReadState state) throws IOException {
8993
String metaFileName = IndexFileNames.segmentFileName(
9094
state.segmentInfo.name,
@@ -122,11 +126,9 @@ private static IndexInput openDataInput(
122126
) throws IOException {
123127
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
124128
// use direct IO for accessing raw vector data for searches
125-
IndexInput in = USE_DIRECT_IO
126-
&& context.context() == IOContext.Context.DEFAULT
127-
&& state.directory instanceof DirectIOIndexInputSupplier did
128-
? did.openInputDirect(fileName, context)
129-
: state.directory.openInput(fileName, context);
129+
IndexInput in = USE_DIRECT_IO && state.directory instanceof DirectIOIndexInputSupplier did
130+
? did.openInputDirect(fileName, context)
131+
: state.directory.openInput(fileName, context);
130132
boolean success = false;
131133
try {
132134
int versionVectorData = CodecUtil.checkIndexHeader(
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.es818;
11+
12+
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
13+
import org.apache.lucene.index.ByteVectorValues;
14+
import org.apache.lucene.index.FloatVectorValues;
15+
import org.apache.lucene.search.KnnCollector;
16+
import org.apache.lucene.util.Accountable;
17+
import org.apache.lucene.util.Bits;
18+
import org.apache.lucene.util.hnsw.RandomVectorScorer;
19+
import org.elasticsearch.core.IOUtils;
20+
21+
import java.io.IOException;
22+
import java.util.Collection;
23+
24+
class MergeReaderWrapper extends FlatVectorsReader {
25+
26+
private final FlatVectorsReader mainReader;
27+
private final FlatVectorsReader mergeReader;
28+
29+
protected MergeReaderWrapper(FlatVectorsReader mainReader, FlatVectorsReader mergeReader) {
30+
super(mainReader.getFlatVectorScorer());
31+
this.mainReader = mainReader;
32+
this.mergeReader = mergeReader;
33+
}
34+
35+
@Override
36+
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
37+
return mainReader.getRandomVectorScorer(field, target);
38+
}
39+
40+
@Override
41+
public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) throws IOException {
42+
return mainReader.getRandomVectorScorer(field, target);
43+
}
44+
45+
@Override
46+
public void checkIntegrity() throws IOException {
47+
mainReader.checkIntegrity();
48+
}
49+
50+
@Override
51+
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
52+
return mainReader.getFloatVectorValues(field);
53+
}
54+
55+
@Override
56+
public ByteVectorValues getByteVectorValues(String field) throws IOException {
57+
return mainReader.getByteVectorValues(field);
58+
}
59+
60+
@Override
61+
public FlatVectorsReader getMergeInstance() {
62+
return mergeReader;
63+
}
64+
65+
@Override
66+
public long ramBytesUsed() {
67+
return mainReader.ramBytesUsed();
68+
}
69+
70+
@Override
71+
public Collection<Accountable> getChildResources() {
72+
return mainReader.getChildResources();
73+
}
74+
75+
@Override
76+
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
77+
mainReader.search(field, target, knnCollector, acceptDocs);
78+
}
79+
80+
@Override
81+
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
82+
mainReader.search(field, target, knnCollector, acceptDocs);
83+
}
84+
85+
@Override
86+
public void close() throws IOException {
87+
IOUtils.close(mainReader, mergeReader);
88+
}
89+
}

0 commit comments

Comments
 (0)