Skip to content

Commit bd3cd62

Browse files
Merge branch 'main' into fix/127466
2 parents 2e1f047 + ba95390 commit bd3cd62

File tree

36 files changed

+2051
-1398
lines changed

36 files changed

+2051
-1398
lines changed

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ dependencies {
4242
api(project(':libs:h3'))
4343
api(project(':modules:aggregations'))
4444
api(project(':x-pack:plugin:esql-core'))
45+
api(project(':x-pack:plugin:core'))
4546
api(project(':x-pack:plugin:esql'))
4647
api(project(':x-pack:plugin:esql:compute'))
4748
implementation project(path: ':libs:simdvec')
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.esql;
11+
12+
import org.elasticsearch.common.logging.LogConfigurator;
13+
import org.elasticsearch.common.settings.Settings;
14+
import org.elasticsearch.index.IndexMode;
15+
import org.elasticsearch.license.XPackLicenseState;
16+
import org.elasticsearch.xpack.esql.analysis.Analyzer;
17+
import org.elasticsearch.xpack.esql.analysis.AnalyzerContext;
18+
import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
19+
import org.elasticsearch.xpack.esql.analysis.Verifier;
20+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
21+
import org.elasticsearch.xpack.esql.core.type.EsField;
22+
import org.elasticsearch.xpack.esql.core.util.DateUtils;
23+
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
24+
import org.elasticsearch.xpack.esql.index.EsIndex;
25+
import org.elasticsearch.xpack.esql.index.IndexResolution;
26+
import org.elasticsearch.xpack.esql.inference.InferenceResolution;
27+
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
28+
import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer;
29+
import org.elasticsearch.xpack.esql.parser.EsqlParser;
30+
import org.elasticsearch.xpack.esql.parser.QueryParams;
31+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
32+
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
33+
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
34+
import org.elasticsearch.xpack.esql.session.Configuration;
35+
import org.elasticsearch.xpack.esql.telemetry.Metrics;
36+
import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry;
37+
import org.openjdk.jmh.annotations.Benchmark;
38+
import org.openjdk.jmh.annotations.BenchmarkMode;
39+
import org.openjdk.jmh.annotations.Fork;
40+
import org.openjdk.jmh.annotations.Measurement;
41+
import org.openjdk.jmh.annotations.Mode;
42+
import org.openjdk.jmh.annotations.OutputTimeUnit;
43+
import org.openjdk.jmh.annotations.Scope;
44+
import org.openjdk.jmh.annotations.Setup;
45+
import org.openjdk.jmh.annotations.State;
46+
import org.openjdk.jmh.annotations.Warmup;
47+
import org.openjdk.jmh.infra.Blackhole;
48+
49+
import java.util.LinkedHashMap;
50+
import java.util.Locale;
51+
import java.util.Map;
52+
import java.util.concurrent.TimeUnit;
53+
54+
import static java.util.Collections.emptyMap;
55+
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
56+
57+
@Fork(1)
58+
@Warmup(iterations = 5)
59+
@Measurement(iterations = 10)
60+
@BenchmarkMode(Mode.AverageTime)
61+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
62+
@State(Scope.Benchmark)
63+
public class QueryPlanningBenchmark {
64+
65+
static {
66+
LogConfigurator.configureESLogging();
67+
}
68+
69+
private PlanTelemetry telemetry;
70+
private EsqlParser parser;
71+
private Analyzer analyzer;
72+
private LogicalPlanOptimizer optimizer;
73+
74+
@Setup
75+
public void setup() {
76+
77+
var config = new Configuration(
78+
DateUtils.UTC,
79+
Locale.US,
80+
null,
81+
null,
82+
new QueryPragmas(Settings.EMPTY),
83+
EsqlPlugin.QUERY_RESULT_TRUNCATION_MAX_SIZE.getDefault(Settings.EMPTY),
84+
EsqlPlugin.QUERY_RESULT_TRUNCATION_DEFAULT_SIZE.getDefault(Settings.EMPTY),
85+
"",
86+
false,
87+
Map.of(),
88+
System.nanoTime(),
89+
false
90+
);
91+
92+
var fields = 10_000;
93+
var mapping = LinkedHashMap.<String, EsField>newLinkedHashMap(fields);
94+
for (int i = 0; i < fields; i++) {
95+
mapping.put("field" + i, new EsField("field-" + i, TEXT, emptyMap(), true));
96+
}
97+
98+
var esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD));
99+
100+
var functionRegistry = new EsqlFunctionRegistry();
101+
102+
telemetry = new PlanTelemetry(functionRegistry);
103+
parser = new EsqlParser();
104+
analyzer = new Analyzer(
105+
new AnalyzerContext(
106+
config,
107+
functionRegistry,
108+
IndexResolution.valid(esIndex),
109+
Map.of(),
110+
new EnrichResolution(),
111+
InferenceResolution.EMPTY
112+
),
113+
new Verifier(new Metrics(functionRegistry), new XPackLicenseState(() -> 0L))
114+
);
115+
optimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config, FoldContext.small()));
116+
}
117+
118+
private LogicalPlan plan(String query) {
119+
var parsed = parser.createStatement(query, new QueryParams(), telemetry);
120+
var analyzed = analyzer.analyze(parsed);
121+
var optimized = optimizer.optimize(analyzed);
122+
return optimized;
123+
}
124+
125+
@Benchmark
126+
public void run(Blackhole blackhole) {
127+
blackhole.consume(plan("FROM test | LIMIT 10"));
128+
}
129+
}

docs/reference/enrich-processor/date-processor.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ The `timezone` and `locale` processor parameters are templated. This means that
7979

8080
### Example dealing with short timezone abbreviations safely [date-processor-short-timezone-example]
8181

82-
In the example below, the `message` field in the input is expected to be a string formed of a local date-time in `yyyyMMddHHmmss` format, a timezone abbreviated to one of `PST`, `CET`, or `JST` representing Pacific, Central European, or Japan time, and a payload. This field is split up using a `grok` processor, then the timezones are converted into full names using a `script` processor, then the date-time is parsed using a `date` processor, and finally the unwanted fields are discarded using a `drop` processor.
82+
In the example below, the `message` field in the input is expected to be a string formed of a local date-time in `yyyyMMddHHmmss` format, a timezone abbreviated to one of `PST`, `CET`, or `JST` representing Pacific, Central European, or Japan time, and a payload. This field is split up using a `grok` processor, then the timezones are converted into full names using a `script` processor, then the date-time is parsed using a `date` processor, and finally the unwanted fields are discarded using a `remove` processor.
8383

8484
```js
8585
{

docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ added as new columns to that row.
4141
If multiple documents in the lookup index match a single row in your
4242
results, the output will contain one row for each matching combination.
4343

44-
**Examples**
45-
4644
::::{tip}
4745
In case of name collisions, the newly created columns will override existing columns.
4846
::::
4947

48+
**Examples**
49+
5050
**IP Threat correlation**: This query would allow you to see if any source
5151
IPs match known malicious addresses.
5252

muted-tests.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,6 @@ tests:
101101
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
102102
method: test {p0=transform/transforms_reset/Test reset running transform}
103103
issue: https://github.com/elastic/elasticsearch/issues/117473
104-
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
105-
method: test {p0=search.highlight/50_synthetic_source/text multi unified from vectors}
106-
issue: https://github.com/elastic/elasticsearch/issues/117815
107104
- class: org.elasticsearch.xpack.ml.integration.RegressionIT
108105
method: testTwoJobsWithSameRandomizeSeedUseSameTrainingSet
109106
issue: https://github.com/elastic/elasticsearch/issues/117805
@@ -435,9 +432,6 @@ tests:
435432
- class: org.elasticsearch.xpack.esql.qa.single_node.PushQueriesIT
436433
method: testPushCaseInsensitiveEqualityOnDefaults
437434
issue: https://github.com/elastic/elasticsearch/issues/127431
438-
- class: org.elasticsearch.xpack.esql.type.EsqlDataTypeConverterTests
439-
method: testSuggestedCast
440-
issue: https://github.com/elastic/elasticsearch/issues/127535
441435
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeIT
442436
method: test
443437
issue: https://github.com/elastic/elasticsearch/issues/127536

server/src/main/java/org/elasticsearch/common/blobstore/support/BlobContainerUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ private BlobContainerUtils() {
2222
// no instances
2323
}
2424

25-
public static final int MAX_REGISTER_CONTENT_LENGTH = 2 * Long.BYTES;
25+
public static final int MAX_REGISTER_CONTENT_LENGTH = 3 * Long.BYTES;
2626

2727
public static void ensureValidRegisterContent(BytesReference bytesReference) {
2828
if (bytesReference.length() > MAX_REGISTER_CONTENT_LENGTH) {

server/src/test/java/org/elasticsearch/common/blobstore/fs/FsBlobContainerTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ public void testCompareAndExchange() throws Exception {
237237
expectedValue.set(newValue);
238238
}
239239

240-
container.writeBlob(randomPurpose(), key, new BytesArray(new byte[17]), false);
240+
container.writeBlob(randomPurpose(), key, new BytesArray(new byte[25]), false);
241241
assertThat(
242242
safeAwaitFailure(
243243
OptionalBytesReference.class,

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/TimeSeriesBlockHash.java

Lines changed: 61 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@
3030
import org.elasticsearch.core.ReleasableIterator;
3131
import org.elasticsearch.core.Releasables;
3232

33-
import java.util.Objects;
34-
3533
/**
3634
* An optimized block hash that receives two blocks: tsid and timestamp, which are sorted.
3735
* Since the incoming data is sorted, this block hash appends the incoming data to the internal arrays without lookup.
@@ -41,7 +39,7 @@ public final class TimeSeriesBlockHash extends BlockHash {
4139
private final int tsHashChannel;
4240
private final int timestampIntervalChannel;
4341

44-
private final BytesRef lastTsid = new BytesRef();
42+
private int lastTsidPosition = 0;
4543
private final BytesRefArrayWithSize tsidArray;
4644

4745
private long lastTimestamp;
@@ -64,44 +62,77 @@ public void close() {
6462
Releasables.close(tsidArray, timestampArray, perTsidCountArray);
6563
}
6664

65+
private OrdinalBytesRefVector getTsidVector(Page page) {
66+
BytesRefBlock block = page.getBlock(tsHashChannel);
67+
var ordinalBlock = block.asOrdinals();
68+
if (ordinalBlock == null) {
69+
throw new IllegalStateException("expected ordinal block for tsid");
70+
}
71+
var ordinalVector = ordinalBlock.asVector();
72+
if (ordinalVector == null) {
73+
throw new IllegalStateException("expected ordinal vector for tsid");
74+
}
75+
return ordinalVector;
76+
}
77+
78+
private LongVector getTimestampVector(Page page) {
79+
final LongBlock timestampsBlock = page.getBlock(timestampIntervalChannel);
80+
LongVector timestampsVector = timestampsBlock.asVector();
81+
if (timestampsVector == null) {
82+
throw new IllegalStateException("expected long vector for timestamp");
83+
}
84+
return timestampsVector;
85+
}
86+
6787
@Override
6888
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
69-
final BytesRefBlock tsidBlock = page.getBlock(tsHashChannel);
70-
final BytesRefVector tsidVector = Objects.requireNonNull(tsidBlock.asVector(), "tsid input must be a vector");
71-
final LongBlock timestampBlock = page.getBlock(timestampIntervalChannel);
72-
final LongVector timestampVector = Objects.requireNonNull(timestampBlock.asVector(), "timestamp input must be a vector");
73-
try (var ordsBuilder = blockFactory.newIntVectorBuilder(tsidVector.getPositionCount())) {
89+
final BytesRefVector tsidDict;
90+
final IntVector tsidOrdinals;
91+
{
92+
final var tsidVector = getTsidVector(page);
93+
tsidDict = tsidVector.getDictionaryVector();
94+
tsidOrdinals = tsidVector.getOrdinalsVector();
95+
}
96+
try (var ordsBuilder = blockFactory.newIntVectorBuilder(tsidOrdinals.getPositionCount())) {
7497
final BytesRef spare = new BytesRef();
75-
// TODO: optimize incoming ordinal block
76-
for (int i = 0; i < tsidVector.getPositionCount(); i++) {
77-
final BytesRef tsid = tsidVector.getBytesRef(i, spare);
98+
final BytesRef lastTsid = new BytesRef();
99+
final LongVector timestampVector = getTimestampVector(page);
100+
int lastOrd = -1;
101+
for (int i = 0; i < tsidOrdinals.getPositionCount(); i++) {
102+
final int newOrd = tsidOrdinals.getInt(i);
103+
boolean newGroup = false;
104+
if (lastOrd != newOrd) {
105+
final var newTsid = tsidDict.getBytesRef(newOrd, spare);
106+
if (positionCount() == 0) {
107+
newGroup = true;
108+
} else if (lastOrd == -1) {
109+
tsidArray.get(lastTsidPosition, lastTsid);
110+
newGroup = lastTsid.equals(newTsid) == false;
111+
} else {
112+
newGroup = true;
113+
}
114+
if (newGroup) {
115+
endTsidGroup();
116+
lastTsidPosition = tsidArray.count;
117+
tsidArray.append(newTsid);
118+
}
119+
lastOrd = newOrd;
120+
}
78121
final long timestamp = timestampVector.getLong(i);
79-
ordsBuilder.appendInt(addOnePosition(tsid, timestamp));
122+
if (newGroup || timestamp != lastTimestamp) {
123+
assert newGroup || lastTimestamp >= timestamp : "@timestamp goes backward " + lastTimestamp + " < " + timestamp;
124+
timestampArray.append(timestamp);
125+
lastTimestamp = timestamp;
126+
currentTimestampCount++;
127+
}
128+
ordsBuilder.appendInt(timestampArray.count - 1);
80129
}
81130
try (var ords = ordsBuilder.build()) {
82131
addInput.add(0, ords);
83132
}
84133
}
85134
}
86135

87-
private int addOnePosition(BytesRef tsid, long timestamp) {
88-
boolean newGroup = false;
89-
if (positionCount() == 0 || lastTsid.equals(tsid) == false) {
90-
assert positionCount() == 0 || lastTsid.compareTo(tsid) < 0 : "tsid goes backward ";
91-
endTsidGroup();
92-
tsidArray.append(tsid);
93-
tsidArray.get(tsidArray.count - 1, lastTsid);
94-
newGroup = true;
95-
}
96-
if (newGroup || timestamp != lastTimestamp) {
97-
assert newGroup || lastTimestamp >= timestamp : "@timestamp goes backward " + lastTimestamp + " < " + timestamp;
98-
timestampArray.append(timestamp);
99-
lastTimestamp = timestamp;
100-
currentTimestampCount++;
101-
}
102-
return positionCount() - 1;
103-
}
104-
105136
private void endTsidGroup() {
106137
if (currentTimestampCount > 0) {
107138
perTsidCountArray.append(currentTimestampCount);
@@ -270,7 +301,6 @@ void get(int index, BytesRef dest) {
270301

271302
BytesRefVector toVector() {
272303
BytesRefVector vector = blockFactory.newBytesRefArrayVector(array, count);
273-
blockFactory.adjustBreaker(vector.ramBytesUsed() - array.bigArraysRamBytesUsed());
274304
array = null;
275305
return vector;
276306
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,11 @@ void writeOrdinalBlock(StreamOutput out) throws IOException {
5454
* Returns true if this ordinal block is dense enough to enable optimizations using its ordinals
5555
*/
5656
public boolean isDense() {
57-
return isDense(bytes.getPositionCount(), ordinals.getTotalValueCount());
57+
return isDense(ordinals.getTotalValueCount(), bytes.getPositionCount());
5858
}
5959

60-
public static boolean isDense(int totalPositions, int numOrdinals) {
61-
return numOrdinals * 2L / 3L >= totalPositions;
60+
public static boolean isDense(long totalPositions, long dictionarySize) {
61+
return totalPositions >= 10 && totalPositions >= dictionarySize * 2L;
6262
}
6363

6464
public IntBlock getOrdinalsBlock() {
@@ -75,7 +75,7 @@ public BytesRef getBytesRef(int valueIndex, BytesRef dest) {
7575
}
7676

7777
@Override
78-
public BytesRefVector asVector() {
78+
public OrdinalBytesRefVector asVector() {
7979
IntVector vector = ordinals.asVector();
8080
if (vector != null) {
8181
return new OrdinalBytesRefVector(vector, bytes);
@@ -251,6 +251,20 @@ public long ramBytesUsed() {
251251
return ordinals.ramBytesUsed() + bytes.ramBytesUsed();
252252
}
253253

254+
@Override
255+
public boolean equals(Object o) {
256+
if (o instanceof BytesRefBlock b) {
257+
return BytesRefBlock.equals(this, b);
258+
} else {
259+
return false;
260+
}
261+
}
262+
263+
@Override
264+
public int hashCode() {
265+
return BytesRefBlock.hash(this);
266+
}
267+
254268
@Override
255269
public String toString() {
256270
return getClass().getSimpleName() + "[ordinals=" + ordinals + ", bytes=" + bytes + "]";

0 commit comments

Comments
 (0)