Skip to content

Commit 8cd4ae7

Browse files
authored
Merge branch 'main' into add-data-stream-lifecycle-retention-to-templates
2 parents e44b2fb + 7d1076e commit 8cd4ae7

File tree

769 files changed

+7530
-6578
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

769 files changed

+7530
-6578
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@
3939
import org.elasticsearch.compute.data.LongBlock;
4040
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
4141
import org.elasticsearch.compute.data.Page;
42+
import org.elasticsearch.compute.expression.ExpressionEvaluator;
4243
import org.elasticsearch.compute.operator.AggregationOperator;
4344
import org.elasticsearch.compute.operator.DriverContext;
44-
import org.elasticsearch.compute.operator.EvalOperator;
4545
import org.elasticsearch.compute.operator.HashAggregationOperator;
4646
import org.elasticsearch.compute.operator.Operator;
4747
import org.openjdk.jmh.annotations.Benchmark;
@@ -664,7 +664,7 @@ private static AggregatorFunctionSupplier filtered(AggregatorFunctionSupplier ag
664664
return agg;
665665
}
666666
BooleanBlock mask = mask(filter).asBlock();
667-
return new FilteredAggregatorFunctionSupplier(agg, context -> new EvalOperator.ExpressionEvaluator() {
667+
return new FilteredAggregatorFunctionSupplier(agg, context -> new ExpressionEvaluator() {
668668
@Override
669669
public Block eval(Page page) {
670670
mask.incRef();

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.compute.data.LongVector;
2828
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
2929
import org.elasticsearch.compute.data.Page;
30+
import org.elasticsearch.compute.expression.ExpressionEvaluator;
3031
import org.elasticsearch.compute.operator.DriverContext;
3132
import org.elasticsearch.compute.operator.EvalOperator;
3233
import org.elasticsearch.compute.operator.Operator;
@@ -49,6 +50,7 @@
4950
import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo;
5051
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin;
5152
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
53+
import org.elasticsearch.xpack.esql.expression.function.scalar.string.JsonExtract;
5254
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToLower;
5355
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToUpper;
5456
import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.RLike;
@@ -128,6 +130,8 @@ static void selfTest() {
128130
"coalesce_2_lazy",
129131
"date_trunc",
130132
"equal_to_const",
133+
"json_extract",
134+
"json_extract_object",
131135
"long_equal_to_long",
132136
"long_equal_to_int",
133137
"mv_min",
@@ -148,7 +152,7 @@ private static Operator operator(String operation) {
148152
return new EvalOperator(driverContext, evaluator(operation));
149153
}
150154

151-
private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
155+
private static ExpressionEvaluator evaluator(String operation) {
152156
return switch (operation) {
153157
case "abs" -> {
154158
FieldAttribute longField = longField();
@@ -180,7 +184,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
180184
lhs = new Add(Source.EMPTY, lhs, new Literal(Source.EMPTY, 1L, DataType.LONG), configuration());
181185
rhs = new Add(Source.EMPTY, rhs, new Literal(Source.EMPTY, 1L, DataType.LONG), configuration());
182186
}
183-
EvalOperator.ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
187+
ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
184188
FOLD_CONTEXT,
185189
new Case(Source.EMPTY, condition, List.of(lhs, rhs)),
186190
layout(f1, f2)
@@ -198,7 +202,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
198202
if (operation.endsWith("lazy")) {
199203
lhs = new Add(Source.EMPTY, lhs, new Literal(Source.EMPTY, 1L, DataType.LONG), configuration());
200204
}
201-
EvalOperator.ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
205+
ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
202206
FOLD_CONTEXT,
203207
new Coalesce(Source.EMPTY, lhs, List.of(f2)),
204208
layout(f1, f2)
@@ -234,6 +238,22 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
234238
layout(longField)
235239
).get(driverContext);
236240
}
241+
case "json_extract" -> {
242+
FieldAttribute keywordField = keywordField();
243+
yield EvalMapper.toEvaluator(
244+
FOLD_CONTEXT,
245+
new JsonExtract(Source.EMPTY, keywordField, new Literal(Source.EMPTY, new BytesRef("user.name"), DataType.KEYWORD)),
246+
layout(keywordField)
247+
).get(driverContext);
248+
}
249+
case "json_extract_object" -> {
250+
FieldAttribute keywordField = keywordField();
251+
yield EvalMapper.toEvaluator(
252+
FOLD_CONTEXT,
253+
new JsonExtract(Source.EMPTY, keywordField, new Literal(Source.EMPTY, new BytesRef("user"), DataType.KEYWORD)),
254+
layout(keywordField)
255+
).get(driverContext);
256+
}
237257
case "long_equal_to_long" -> {
238258
FieldAttribute lhs = longField();
239259
FieldAttribute rhs = longField();
@@ -259,7 +279,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
259279
Expression ltkb = new LessThan(Source.EMPTY, f, kb());
260280
Expression ltmb = new LessThan(Source.EMPTY, f, mb());
261281
Expression ltgb = new LessThan(Source.EMPTY, f, gb());
262-
EvalOperator.ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
282+
ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
263283
FOLD_CONTEXT,
264284
new Case(Source.EMPTY, ltkb, List.of(b(), ltmb, kb(), ltgb, mb(), gb())),
265285
layout(f)
@@ -273,7 +293,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
273293
case "round_to_2" -> {
274294
FieldAttribute f = longField();
275295

276-
EvalOperator.ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
296+
ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
277297
FOLD_CONTEXT,
278298
new RoundTo(Source.EMPTY, f, List.of(b(), kb())),
279299
layout(f)
@@ -287,7 +307,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
287307
case "round_to_3" -> {
288308
FieldAttribute f = longField();
289309

290-
EvalOperator.ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
310+
ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
291311
FOLD_CONTEXT,
292312
new RoundTo(Source.EMPTY, f, List.of(b(), kb(), mb())),
293313
layout(f)
@@ -301,7 +321,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) {
301321
case "round_to_4" -> {
302322
FieldAttribute f = longField();
303323

304-
EvalOperator.ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
324+
ExpressionEvaluator evaluator = EvalMapper.toEvaluator(
305325
FOLD_CONTEXT,
306326
new RoundTo(Source.EMPTY, f, List.of(b(), kb(), mb(), gb())),
307327
layout(f)
@@ -575,6 +595,14 @@ private static void checkExpected(String operation, Page actual) {
575595
}
576596
}
577597
}
598+
case "json_extract" -> {
599+
BytesRef expected = new BytesRef("John");
600+
checkBytes(operation, actual, false, new BytesRef[] { expected, expected });
601+
}
602+
case "json_extract_object" -> {
603+
BytesRef expected = new BytesRef("{\"name\":\"John\",\"age\":30}");
604+
checkBytes(operation, actual, false, new BytesRef[] { expected, expected });
605+
}
578606
case "to_lower" -> checkBytes(operation, actual, false, new BytesRef[] { new BytesRef("foo"), new BytesRef("bar") });
579607
case "to_lower_ords" -> checkBytes(operation, actual, true, new BytesRef[] { new BytesRef("foo"), new BytesRef("bar") });
580608
case "to_upper" -> checkBytes(operation, actual, false, new BytesRef[] { new BytesRef("FOO"), new BytesRef("BAR") });
@@ -642,6 +670,14 @@ private static Page page(String operation) {
642670
}
643671
yield new Page(f1.build(), f2.build());
644672
}
673+
case "json_extract", "json_extract_object" -> {
674+
var builder = blockFactory.newBytesRefVectorBuilder(BLOCK_LENGTH);
675+
BytesRef json = new BytesRef("{\"user\":{\"name\":\"John\",\"age\":30},\"active\":true}");
676+
for (int i = 0; i < BLOCK_LENGTH; i++) {
677+
builder.appendBytesRef(json);
678+
}
679+
yield new Page(builder.build().asBlock());
680+
}
645681
case "long_equal_to_long" -> {
646682
var lhs = blockFactory.newLongBlockBuilder(BLOCK_LENGTH);
647683
var rhs = blockFactory.newLongBlockBuilder(BLOCK_LENGTH);
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.benchmark.esql;
9+
10+
import org.apache.lucene.util.BytesRef;
11+
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
12+
import org.elasticsearch.common.util.BigArrays;
13+
import org.elasticsearch.compute.data.Block;
14+
import org.elasticsearch.compute.data.BlockFactory;
15+
import org.elasticsearch.compute.data.Page;
16+
import org.elasticsearch.compute.expression.ExpressionEvaluator;
17+
import org.elasticsearch.compute.operator.DriverContext;
18+
import org.elasticsearch.xpack.esql.core.expression.Expression;
19+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
20+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
21+
import org.elasticsearch.xpack.esql.core.expression.Literal;
22+
import org.elasticsearch.xpack.esql.core.tree.Source;
23+
import org.elasticsearch.xpack.esql.core.type.DataType;
24+
import org.elasticsearch.xpack.esql.core.type.EsField;
25+
import org.elasticsearch.xpack.esql.evaluator.EvalMapper;
26+
import org.elasticsearch.xpack.esql.expression.function.scalar.string.JsonExtract;
27+
import org.elasticsearch.xpack.esql.planner.Layout;
28+
import org.openjdk.jmh.annotations.Benchmark;
29+
import org.openjdk.jmh.annotations.BenchmarkMode;
30+
import org.openjdk.jmh.annotations.Fork;
31+
import org.openjdk.jmh.annotations.Level;
32+
import org.openjdk.jmh.annotations.Measurement;
33+
import org.openjdk.jmh.annotations.Mode;
34+
import org.openjdk.jmh.annotations.OperationsPerInvocation;
35+
import org.openjdk.jmh.annotations.OutputTimeUnit;
36+
import org.openjdk.jmh.annotations.Param;
37+
import org.openjdk.jmh.annotations.Scope;
38+
import org.openjdk.jmh.annotations.Setup;
39+
import org.openjdk.jmh.annotations.State;
40+
import org.openjdk.jmh.annotations.Warmup;
41+
42+
import java.util.List;
43+
import java.util.Map;
44+
import java.util.concurrent.TimeUnit;
45+
46+
/**
47+
* Benchmarks for JSON_EXTRACT through the full eval pipeline.
48+
* Uses Pages and evaluators to match production execution paths.
49+
* Suitable for before/after comparison of byte-slicing optimization.
50+
*/
51+
@Fork(1)
52+
@Warmup(iterations = 5)
53+
@Measurement(iterations = 10)
54+
@BenchmarkMode(Mode.AverageTime)
55+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
56+
@State(Scope.Thread)
57+
public class JsonExtractBenchmark {
58+
59+
private static final int BLOCK_LENGTH = 1024;
60+
61+
private static final BlockFactory blockFactory = BlockFactory.builder(BigArrays.NON_RECYCLING_INSTANCE)
62+
.breaker(new NoopCircuitBreaker("none"))
63+
.build();
64+
65+
private static final DriverContext driverContext = new DriverContext(BigArrays.NON_RECYCLING_INSTANCE, blockFactory, null);
66+
67+
private static final FoldContext FOLD_CONTEXT = FoldContext.small();
68+
69+
@Param(
70+
{
71+
"small_object",
72+
"medium_object",
73+
"large_object",
74+
"large_nested_extract",
75+
"array_of_objects",
76+
"nested_scalar",
77+
"deep_nesting",
78+
"number",
79+
"boolean",
80+
"string" }
81+
)
82+
public String scenario;
83+
84+
private ExpressionEvaluator evaluator;
85+
private Page page;
86+
87+
@Setup(Level.Trial)
88+
public void setup() {
89+
BytesRef json;
90+
String path;
91+
92+
switch (scenario) {
93+
case "small_object" -> {
94+
json = new BytesRef("{\"user\":{\"name\":\"John\",\"age\":30},\"active\":true}");
95+
path = "user";
96+
}
97+
case "medium_object" -> {
98+
StringBuilder sb = new StringBuilder();
99+
sb.append("{\"header\":\"v1\",\"payload\":{");
100+
for (int i = 0; i < 20; i++) {
101+
if (i > 0) sb.append(",");
102+
sb.append("\"f").append(i).append("\":\"value_").append(i).append("\"");
103+
}
104+
sb.append("},\"footer\":\"end\"}");
105+
json = new BytesRef(sb.toString());
106+
path = "payload";
107+
}
108+
case "large_object" -> {
109+
StringBuilder sb = new StringBuilder();
110+
sb.append("{\"data\":{");
111+
for (int i = 0; i < 100; i++) {
112+
if (i > 0) sb.append(",");
113+
sb.append("\"field_").append(i).append("\":");
114+
sb.append("{\"value\":").append(i);
115+
sb.append(",\"label\":\"item_").append(i).append("\"");
116+
sb.append(",\"tags\":[\"a\",\"b\",\"c\"]");
117+
sb.append("}");
118+
}
119+
sb.append("},\"meta\":\"info\"}");
120+
json = new BytesRef(sb.toString());
121+
path = "data";
122+
}
123+
case "large_nested_extract" -> {
124+
StringBuilder sb = new StringBuilder();
125+
sb.append("{\"level1\":{\"level2\":{\"target\":{");
126+
for (int i = 0; i < 200; i++) {
127+
if (i > 0) sb.append(",");
128+
sb.append("\"k").append(i).append("\":\"").append("x".repeat(20)).append("\"");
129+
}
130+
sb.append("},\"other\":\"skip\"},\"more\":");
131+
sb.append("{");
132+
for (int i = 0; i < 100; i++) {
133+
if (i > 0) sb.append(",");
134+
sb.append("\"pad").append(i).append("\":").append(i);
135+
}
136+
sb.append("}}}");
137+
json = new BytesRef(sb.toString());
138+
path = "level1.level2.target";
139+
}
140+
case "array_of_objects" -> {
141+
StringBuilder sb = new StringBuilder();
142+
sb.append("{\"items\":[");
143+
for (int i = 0; i < 50; i++) {
144+
if (i > 0) sb.append(",");
145+
sb.append("{\"id\":").append(i);
146+
sb.append(",\"name\":\"item_").append(i).append("\"");
147+
sb.append(",\"details\":{\"weight\":").append(i * 1.5);
148+
sb.append(",\"tags\":[\"tag_a\",\"tag_b\"]}}");
149+
}
150+
sb.append("]}");
151+
json = new BytesRef(sb.toString());
152+
path = "items[25]";
153+
}
154+
case "nested_scalar" -> {
155+
json = new BytesRef("{\"a\":{\"b\":{\"c\":{\"d\":{\"e\":\"deeply_nested_value\"}}}}}");
156+
path = "a.b.c.d.e";
157+
}
158+
case "deep_nesting" -> {
159+
StringBuilder sb = new StringBuilder();
160+
String[] keys = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" };
161+
for (String key : keys) {
162+
sb.append("{\"").append(key).append("\":");
163+
}
164+
sb.append("{\"result\":42,\"items\":[1,2,3]}");
165+
for (int i = 0; i < keys.length; i++) {
166+
sb.append("}");
167+
}
168+
json = new BytesRef(sb.toString());
169+
path = "a.b.c.d.e.f.g.h.i.j";
170+
}
171+
case "number" -> {
172+
json = new BytesRef("{\"metrics\":{\"cpu\":98.6,\"memory\":1073741824,\"disk\":42},\"host\":\"server1\"}");
173+
path = "metrics.memory";
174+
}
175+
case "boolean" -> {
176+
json = new BytesRef("{\"config\":{\"enabled\":true,\"debug\":false},\"version\":2}");
177+
path = "config.enabled";
178+
}
179+
case "string" -> {
180+
json = new BytesRef("{\"user\":{\"name\":\"John Doe\",\"email\":\"john@example.com\"},\"role\":\"admin\"}");
181+
path = "user.name";
182+
}
183+
default -> throw new UnsupportedOperationException("unknown scenario: " + scenario);
184+
}
185+
186+
// Build expression: JSON_EXTRACT(field, "path")
187+
FieldAttribute field = new FieldAttribute(
188+
Source.EMPTY,
189+
"json",
190+
new EsField("json", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)
191+
);
192+
Expression expr = new JsonExtract(Source.EMPTY, field, new Literal(Source.EMPTY, new BytesRef(path), DataType.KEYWORD));
193+
194+
// Build evaluator through the standard eval pipeline
195+
Layout.Builder layoutBuilder = new Layout.Builder();
196+
layoutBuilder.append(List.of(field));
197+
Layout layout = layoutBuilder.build();
198+
evaluator = EvalMapper.toEvaluator(FOLD_CONTEXT, expr, layout).get(driverContext);
199+
200+
// Build page with BLOCK_LENGTH identical JSON rows
201+
var builder = blockFactory.newBytesRefVectorBuilder(BLOCK_LENGTH);
202+
for (int i = 0; i < BLOCK_LENGTH; i++) {
203+
builder.appendBytesRef(json);
204+
}
205+
page = new Page(builder.build().asBlock());
206+
}
207+
208+
@Benchmark
209+
@OperationsPerInvocation(BLOCK_LENGTH)
210+
public Block extract() {
211+
return evaluator.eval(page);
212+
}
213+
}

docs/changelog/143702.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 143702
2+
summary: "JSON_EXTRACT: zero-copy byte slicing for object, array, and number extraction"
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

0 commit comments

Comments
 (0)