Skip to content

Commit 747150e

Browse files
authored
Merge branch 'main' into ivf_hkmeans_struc2
2 parents 234a0a8 + b1b3379 commit 747150e

File tree

740 files changed

+18106
-9012
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

740 files changed

+18106
-9012
lines changed

README.asciidoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ For the complete Elasticsearch documentation visit
275275
https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html[elastic.co].
276276

277277
For information about our documentation processes, see the
278-
xref:docs/README.asciidoc[docs README].
278+
xref:https://github.com/elastic/elasticsearch/blob/main/docs/README.md[docs README].
279279

280280
[[examples]]
281281
== Examples and guides

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ public class QueryPlanningBenchmark {
7070
private EsqlParser defaultParser;
7171
private Analyzer manyFieldsAnalyzer;
7272
private LogicalPlanOptimizer defaultOptimizer;
73+
private Configuration config;
7374

7475
@Setup
7576
public void setup() {
76-
77-
var config = new Configuration(
77+
this.config = new Configuration(
7878
DateUtils.UTC,
7979
Locale.US,
8080
null,
@@ -116,7 +116,7 @@ public void setup() {
116116
}
117117

118118
private LogicalPlan plan(EsqlParser parser, Analyzer analyzer, LogicalPlanOptimizer optimizer, String query) {
119-
var parsed = parser.createStatement(query, new QueryParams(), telemetry);
119+
var parsed = parser.createStatement(query, new QueryParams(), telemetry, config);
120120
var analyzed = analyzer.analyze(parsed);
121121
var optimized = optimizer.optimize(analyzed);
122122
return optimized;
Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.benchmark.compute.operator;
10+
package org.elasticsearch.benchmark._nightly.esql;
1111

1212
import org.apache.lucene.document.FieldType;
1313
import org.apache.lucene.document.NumericDocValuesField;
@@ -41,7 +41,8 @@
4141
import org.elasticsearch.compute.data.Page;
4242
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
4343
import org.elasticsearch.compute.lucene.ShardRefCounted;
44-
import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator;
44+
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
45+
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorStatus;
4546
import org.elasticsearch.compute.operator.topn.TopNOperator;
4647
import org.elasticsearch.core.IOUtils;
4748
import org.elasticsearch.index.IndexSettings;
@@ -84,10 +85,18 @@
8485
@State(Scope.Thread)
8586
@Fork(1)
8687
public class ValuesSourceReaderBenchmark {
88+
private static final String[] SUPPORTED_LAYOUTS = new String[] { "in_order", "shuffled", "shuffled_singles" };
89+
private static final String[] SUPPORTED_NAMES = new String[] {
90+
"long",
91+
"int",
92+
"double",
93+
"keyword",
94+
"stored_keyword",
95+
"3_stored_keywords" };
96+
8797
private static final int BLOCK_LENGTH = 16 * 1024;
8898
private static final int INDEX_SIZE = 10 * BLOCK_LENGTH;
8999
private static final int COMMIT_INTERVAL = 500;
90-
private static final BigArrays BIG_ARRAYS = BigArrays.NON_RECYCLING_INSTANCE;
91100
private static final BlockFactory blockFactory = BlockFactory.getInstance(
92101
new NoopCircuitBreaker("noop"),
93102
BigArrays.NON_RECYCLING_INSTANCE
@@ -103,8 +112,8 @@ static void selfTest() {
103112
ValuesSourceReaderBenchmark benchmark = new ValuesSourceReaderBenchmark();
104113
benchmark.setupIndex();
105114
try {
106-
for (String layout : ValuesSourceReaderBenchmark.class.getField("layout").getAnnotationsByType(Param.class)[0].value()) {
107-
for (String name : ValuesSourceReaderBenchmark.class.getField("name").getAnnotationsByType(Param.class)[0].value()) {
115+
for (String layout : ValuesSourceReaderBenchmark.SUPPORTED_LAYOUTS) {
116+
for (String name : ValuesSourceReaderBenchmark.SUPPORTED_NAMES) {
108117
benchmark.layout = layout;
109118
benchmark.name = name;
110119
try {
@@ -118,7 +127,7 @@ static void selfTest() {
118127
} finally {
119128
benchmark.teardownIndex();
120129
}
121-
} catch (IOException | NoSuchFieldException e) {
130+
} catch (IOException e) {
122131
throw new AssertionError(e);
123132
}
124133
}
@@ -320,10 +329,10 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
320329
* each page has a single document rather than {@code BLOCK_SIZE} docs.</li>
321330
* </ul>
322331
*/
323-
@Param({ "in_order", "shuffled", "shuffled_singles" })
332+
@Param({ "in_order", "shuffled" })
324333
public String layout;
325334

326-
@Param({ "long", "int", "double", "keyword", "stored_keyword", "3_stored_keywords" })
335+
@Param({ "long", "keyword", "stored_keyword" })
327336
public String name;
328337

329338
private Directory directory;
@@ -343,7 +352,7 @@ public void benchmark() {
343352
);
344353
long sum = 0;
345354
for (Page page : pages) {
346-
op.addInput(page);
355+
op.addInput(page.shallowCopy());
347356
switch (name) {
348357
case "long" -> {
349358
LongVector values = op.getOutput().<LongBlock>getBlock(1).asVector();
@@ -411,7 +420,7 @@ public void benchmark() {
411420
throw new AssertionError("[" + layout + "][" + name + "] expected [" + expected + "] but was [" + sum + "]");
412421
}
413422
boolean foundStoredFieldLoader = false;
414-
ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) op.status();
423+
ValuesSourceReaderOperatorStatus status = (ValuesSourceReaderOperatorStatus) op.status();
415424
for (Map.Entry<String, Integer> e : status.readersBuilt().entrySet()) {
416425
if (e.getKey().indexOf("stored_fields") >= 0) {
417426
foundStoredFieldLoader = true;

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesAggregatorBenchmark.java

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ static void selfTest() {
9595
try {
9696
for (String groups : ValuesAggregatorBenchmark.class.getField("groups").getAnnotationsByType(Param.class)[0].value()) {
9797
for (String dataType : ValuesAggregatorBenchmark.class.getField("dataType").getAnnotationsByType(Param.class)[0].value()) {
98-
run(Integer.parseInt(groups), dataType, 10);
98+
run(Integer.parseInt(groups), dataType, 10, 0);
99+
run(Integer.parseInt(groups), dataType, 10, 1);
99100
}
100101
}
101102
} catch (NoSuchFieldException e) {
@@ -113,7 +114,10 @@ static void selfTest() {
113114
@Param({ BYTES_REF, INT, LONG })
114115
public String dataType;
115116

116-
private static Operator operator(DriverContext driverContext, int groups, String dataType) {
117+
@Param({ "0", "1" })
118+
public int numOrdinalMerges;
119+
120+
private static Operator operator(DriverContext driverContext, int groups, String dataType, int numOrdinalMerges) {
117121
if (groups == 1) {
118122
return new AggregationOperator(
119123
List.of(supplier(dataType).aggregatorFactory(AggregatorMode.SINGLE, List.of(0)).apply(driverContext)),
@@ -125,7 +129,24 @@ private static Operator operator(DriverContext driverContext, int groups, String
125129
List.of(supplier(dataType).groupingAggregatorFactory(AggregatorMode.SINGLE, List.of(1))),
126130
() -> BlockHash.build(groupSpec, driverContext.blockFactory(), 16 * 1024, false),
127131
driverContext
128-
);
132+
) {
133+
@Override
134+
public Page getOutput() {
135+
mergeOrdinal();
136+
return super.getOutput();
137+
}
138+
139+
// simulate OrdinalsGroupingOperator
140+
void mergeOrdinal() {
141+
var merged = supplier(dataType).groupingAggregatorFactory(AggregatorMode.SINGLE, List.of(1)).apply(driverContext);
142+
for (int i = 0; i < numOrdinalMerges; i++) {
143+
for (int p = 0; p < groups; p++) {
144+
merged.addIntermediateRow(p, aggregators.getFirst(), p);
145+
}
146+
}
147+
aggregators.set(0, merged);
148+
}
149+
};
129150
}
130151

131152
private static AggregatorFunctionSupplier supplier(String dataType) {
@@ -331,12 +352,12 @@ private static Block groupingBlock(int groups) {
331352

332353
@Benchmark
333354
public void run() {
334-
run(groups, dataType, OP_COUNT);
355+
run(groups, dataType, OP_COUNT, numOrdinalMerges);
335356
}
336357

337-
private static void run(int groups, String dataType, int opCount) {
358+
private static void run(int groups, String dataType, int opCount, int numOrdinalMerges) {
338359
DriverContext driverContext = driverContext();
339-
try (Operator operator = operator(driverContext, groups, dataType)) {
360+
try (Operator operator = operator(driverContext, groups, dataType, numOrdinalMerges)) {
340361
Page page = page(groups, dataType);
341362
for (int i = 0; i < opCount; i++) {
342363
operator.addInput(page.shallowCopy());

benchmarks/src/main/java/org/elasticsearch/benchmark/vector/Int7uScorerBenchmark.java

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import org.apache.lucene.util.quantization.ScalarQuantizer;
2525
import org.elasticsearch.common.logging.LogConfigurator;
2626
import org.elasticsearch.core.IOUtils;
27+
import org.elasticsearch.logging.LogManager;
28+
import org.elasticsearch.logging.Logger;
2729
import org.elasticsearch.simdvec.VectorScorerFactory;
2830
import org.openjdk.jmh.annotations.Benchmark;
2931
import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -61,6 +63,10 @@ public class Int7uScorerBenchmark {
6163

6264
static {
6365
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
66+
if (supportsHeapSegments() == false) {
67+
final Logger LOG = LogManager.getLogger(Int7uScorerBenchmark.class);
68+
LOG.warn("*Query targets cannot run on " + "JDK " + Runtime.version());
69+
}
6470
}
6571

6672
@Param({ "96", "768", "1024" })
@@ -129,15 +135,17 @@ public void setup() throws IOException {
129135
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, scoreCorrectionConstant).get().scorer();
130136
nativeSqrScorer.setScoringOrdinal(0);
131137

132-
// setup for getInt7SQVectorScorer / query vector scoring
133-
float[] queryVec = new float[dims];
134-
for (int i = 0; i < dims; i++) {
135-
queryVec[i] = ThreadLocalRandom.current().nextFloat();
138+
if (supportsHeapSegments()) {
139+
// setup for getInt7SQVectorScorer / query vector scoring
140+
float[] queryVec = new float[dims];
141+
for (int i = 0; i < dims; i++) {
142+
queryVec[i] = ThreadLocalRandom.current().nextFloat();
143+
}
144+
luceneDotScorerQuery = luceneScorer(values, VectorSimilarityFunction.DOT_PRODUCT, queryVec);
145+
nativeDotScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.DOT_PRODUCT, values, queryVec).get();
146+
luceneSqrScorerQuery = luceneScorer(values, VectorSimilarityFunction.EUCLIDEAN, queryVec);
147+
nativeSqrScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.EUCLIDEAN, values, queryVec).get();
136148
}
137-
luceneDotScorerQuery = luceneScorer(values, VectorSimilarityFunction.DOT_PRODUCT, queryVec);
138-
nativeDotScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.DOT_PRODUCT, values, queryVec).get();
139-
luceneSqrScorerQuery = luceneScorer(values, VectorSimilarityFunction.EUCLIDEAN, queryVec);
140-
nativeSqrScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.EUCLIDEAN, values, queryVec).get();
141149
}
142150

143151
@TearDown
@@ -208,6 +216,10 @@ public float squareDistanceNativeQuery() throws IOException {
208216
return nativeSqrScorerQuery.score(1);
209217
}
210218

219+
static boolean supportsHeapSegments() {
220+
return Runtime.version().feature() >= 22;
221+
}
222+
211223
QuantizedByteVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
212224
var sq = new ScalarQuantizer(0.1f, 0.9f, (byte) 7);
213225
var slice = in.slice("values", 0, in.length());
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.benchmark.compute.operator;
10+
package org.elasticsearch.benchmark._nightly.esql;
1111

1212
import org.elasticsearch.test.ESTestCase;
1313

benchmarks/src/test/java/org/elasticsearch/benchmark/vector/Int7uScorerBenchmarkTests.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ public void testDotProduct() throws Exception {
4242
assertEquals(expected, bench.dotProductLucene(), delta);
4343
assertEquals(expected, bench.dotProductNative(), delta);
4444

45-
expected = bench.dotProductLuceneQuery();
46-
assertEquals(expected, bench.dotProductNativeQuery(), delta);
45+
if (Int7uScorerBenchmark.supportsHeapSegments()) {
46+
expected = bench.dotProductLuceneQuery();
47+
assertEquals(expected, bench.dotProductNativeQuery(), delta);
48+
}
4749
} finally {
4850
bench.teardown();
4951
}
@@ -60,8 +62,10 @@ public void testSquareDistance() throws Exception {
6062
assertEquals(expected, bench.squareDistanceLucene(), delta);
6163
assertEquals(expected, bench.squareDistanceNative(), delta);
6264

63-
expected = bench.squareDistanceLuceneQuery();
64-
assertEquals(expected, bench.squareDistanceNativeQuery(), delta);
65+
if (Int7uScorerBenchmark.supportsHeapSegments()) {
66+
expected = bench.squareDistanceLuceneQuery();
67+
assertEquals(expected, bench.squareDistanceNativeQuery(), delta);
68+
}
6569
} finally {
6670
bench.teardown();
6771
}

build-tools-internal/src/main/resources/forbidden/es-all-signatures.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ java.nio.channels.SocketChannel#connect(java.net.SocketAddress)
3535
# org.elasticsearch.core.Booleans#parseBoolean(java.lang.String) directly on the string.
3636
@defaultMessage use org.elasticsearch.core.Booleans#parseBoolean(java.lang.String)
3737
java.lang.Boolean#getBoolean(java.lang.String)
38+
java.lang.Boolean#parseBoolean(java.lang.String)
39+
java.lang.Boolean#valueOf(java.lang.String)
3840

3941
org.apache.lucene.util.IOUtils @ use @org.elasticsearch.core.internal.io instead
4042

docs/changelog/128917.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 128917
2+
summary: Adopt a "LogicalPlan" approach to running multiple sub-queries (with INLINESTATS
3+
so far)
4+
area: ES|QL
5+
type: enhancement
6+
issues: []

docs/changelog/129013.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
pr: 129013
2+
summary: "Add remote index support to LOOKUP JOIN"
3+
area: ES|QL
4+
type: feature
5+
issues: [ ]
6+
highlight:
7+
title: Add remote index support to LOOKUP JOIN
8+
body: |-
9+
Queries containing LOOKUP JOIN now can be preformed on cross-cluster indices, for example:
10+
[source,yaml]
11+
----------------------------
12+
FROM logs-*, remote:logs-* | LOOKUP JOIN clients on ip | SORT timestamp | LIMIT 100
13+
----------------------------

0 commit comments

Comments
 (0)