Skip to content

Commit 3f9bf9a

Browse files
authored
Allow parameter for ES|QL SAMPLE (#129392)
* Allow parameter for ES|QL SAMPLE * fix test to work around issue #120272 * remove unused postAnalysisVerification
1 parent 72b8ab4 commit 3f9bf9a

File tree

10 files changed

+71
-55
lines changed

10 files changed

+71
-55
lines changed

x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,5 +316,5 @@ completionCommand
316316
;
317317

318318
sampleCommand
319-
: DEV_SAMPLE probability=decimalValue
319+
: DEV_SAMPLE probability=constant
320320
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,7 +1168,7 @@ public enum Cap {
11681168
LUCENE_QUERY_EVALUATOR_QUERY_REWRITE,
11691169

11701170
/**
1171-
* Support parameters for LiMIT command.
1171+
* Support parameters for LIMIT command.
11721172
*/
11731173
PARAMETER_FOR_LIMIT,
11741174

@@ -1202,7 +1202,12 @@ public enum Cap {
12021202
*/
12031203
KNN_FUNCTION(Build.current().isSnapshot()),
12041204

1205-
LIKE_WITH_LIST_OF_PATTERNS;
1205+
LIKE_WITH_LIST_OF_PATTERNS,
1206+
1207+
/**
1208+
* Support parameters for SAMPLE command.
1209+
*/
1210+
PARAMETER_FOR_SAMPLE(Build.current().isSnapshot());
12061211

12071212
private final boolean enabled;
12081213

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.interp

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
import java.util.function.Function;
8989

9090
import static java.util.Collections.emptyList;
91+
import static org.elasticsearch.xpack.esql.common.Failure.fail;
9192
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
9293
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
9394
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputExpressions;
@@ -783,7 +784,15 @@ public Literal inferenceId(EsqlBaseParser.IdentifierOrParameterContext ctx) {
783784
}
784785

785786
public PlanFactory visitSampleCommand(EsqlBaseParser.SampleCommandContext ctx) {
786-
var probability = visitDecimalValue(ctx.probability);
787-
return plan -> new Sample(source(ctx), probability, plan);
787+
Source source = source(ctx);
788+
Object val = expression(ctx.probability).fold(FoldContext.small() /* TODO remove me */);
789+
if (val instanceof Double probability && probability > 0.0 && probability < 1.0) {
790+
return input -> new Sample(source, new Literal(source, probability, DataType.DOUBLE), input);
791+
} else {
792+
throw new ParsingException(
793+
source(ctx),
794+
"invalid value for SAMPLE probability [" + val + "], expecting a number between 0 and 1, exclusive"
795+
);
796+
}
788797
}
789798
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Sample.java

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,16 @@
99
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1010
import org.elasticsearch.common.io.stream.StreamInput;
1111
import org.elasticsearch.common.io.stream.StreamOutput;
12-
import org.elasticsearch.search.aggregations.bucket.sampler.random.RandomSamplingQuery;
13-
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware;
1412
import org.elasticsearch.xpack.esql.capabilities.TelemetryAware;
15-
import org.elasticsearch.xpack.esql.common.Failures;
1613
import org.elasticsearch.xpack.esql.core.expression.Expression;
17-
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
18-
import org.elasticsearch.xpack.esql.core.expression.Foldables;
1914
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
2015
import org.elasticsearch.xpack.esql.core.tree.Source;
2116
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
2217

2318
import java.io.IOException;
2419
import java.util.Objects;
2520

26-
import static org.elasticsearch.xpack.esql.common.Failure.fail;
27-
28-
public class Sample extends UnaryPlan implements TelemetryAware, PostAnalysisVerificationAware {
21+
public class Sample extends UnaryPlan implements TelemetryAware {
2922
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Sample", Sample::new);
3023

3124
private final Expression probability;
@@ -92,13 +85,4 @@ public boolean equals(Object obj) {
9285

9386
return Objects.equals(probability, other.probability) && Objects.equals(child(), other.child());
9487
}
95-
96-
@Override
97-
public void postAnalysisVerification(Failures failures) {
98-
try {
99-
RandomSamplingQuery.checkProbabilityRange((double) Foldables.valueOf(FoldContext.small(), probability));
100-
} catch (IllegalArgumentException e) {
101-
failures.add(fail(probability, e.getMessage()));
102-
}
103-
}
10488
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@
118118
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultEnrichResolution;
119119
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.indexWithDateDateNanosUnionType;
120120
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping;
121-
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomValueOtherThanTest;
122121
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
123122
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
124123
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
@@ -3465,20 +3464,6 @@ public void testRrfError() {
34653464
assertThat(e.getMessage(), containsString("Unknown column [_id]"));
34663465
}
34673466

3468-
public void testRandomSampleProbability() {
3469-
assumeTrue("requires SAMPLE capability", EsqlCapabilities.Cap.SAMPLE_V3.isEnabled());
3470-
3471-
var e = expectThrows(VerificationException.class, () -> analyze("FROM test | SAMPLE 1."));
3472-
assertThat(e.getMessage(), containsString("RandomSampling probability must be strictly between 0.0 and 1.0, was [1.0]"));
3473-
3474-
e = expectThrows(VerificationException.class, () -> analyze("FROM test | SAMPLE .0"));
3475-
assertThat(e.getMessage(), containsString("RandomSampling probability must be strictly between 0.0 and 1.0, was [0.0]"));
3476-
3477-
double p = randomValueOtherThanTest(d -> 0 < d && d < 1, () -> randomDoubleBetween(0, Double.MAX_VALUE, false));
3478-
e = expectThrows(VerificationException.class, () -> analyze("FROM test | SAMPLE " + p));
3479-
assertThat(e.getMessage(), containsString("RandomSampling probability must be strictly between 0.0 and 1.0, was [" + p + "]"));
3480-
}
3481-
34823467
// TODO There's too much boilerplate involved here! We need a better way of creating FieldCapabilitiesResponses from a mapping or index.
34833468
private static FieldCapabilitiesIndexResponse fieldCapabilitiesIndexResponse(
34843469
String indexName,

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,25 @@ public void testInvalidLimit() {
162162
assertEquals("1:13: Invalid value for LIMIT [-1], expecting a non negative integer", error("row a = 1 | limit -1"));
163163
}
164164

165+
public void testInvalidSample() {
166+
assertEquals(
167+
"1:13: invalid value for SAMPLE probability [foo], expecting a number between 0 and 1, exclusive",
168+
error("row a = 1 | sample \"foo\"")
169+
);
170+
assertEquals(
171+
"1:13: invalid value for SAMPLE probability [-1.0], expecting a number between 0 and 1, exclusive",
172+
error("row a = 1 | sample -1.0")
173+
);
174+
assertEquals(
175+
"1:13: invalid value for SAMPLE probability [0], expecting a number between 0 and 1, exclusive",
176+
error("row a = 1 | sample 0")
177+
);
178+
assertEquals(
179+
"1:13: invalid value for SAMPLE probability [1], expecting a number between 0 and 1, exclusive",
180+
error("row a = 1 | sample 1")
181+
);
182+
}
183+
165184
private String error(String query) {
166185
ParsingException e = expectThrows(ParsingException.class, () -> defaultAnalyzer.analyze(parser.createStatement(query)));
167186
String message = e.getMessage();

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
import static org.hamcrest.Matchers.hasSize;
105105
import static org.hamcrest.Matchers.instanceOf;
106106
import static org.hamcrest.Matchers.is;
107+
import static org.hamcrest.Matchers.startsWith;
107108

108109
//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug")
109110
public class StatementParserTests extends AbstractStatementParserTests {
@@ -3485,8 +3486,15 @@ public void testSample() {
34853486
assumeTrue("SAMPLE requires corresponding capability", EsqlCapabilities.Cap.SAMPLE_V3.isEnabled());
34863487
expectError("FROM test | SAMPLE .1 2", "line 1:23: extraneous input '2' expecting <EOF>");
34873488
expectError("FROM test | SAMPLE .1 \"2\"", "line 1:23: extraneous input '\"2\"' expecting <EOF>");
3488-
expectError("FROM test | SAMPLE 1", "line 1:20: mismatched input '1' expecting {DECIMAL_LITERAL, '+', '-'}");
3489-
expectError("FROM test | SAMPLE", "line 1:19: mismatched input '<EOF>' expecting {DECIMAL_LITERAL, '+', '-'}");
3489+
expectError(
3490+
"FROM test | SAMPLE 1",
3491+
"line 1:13: invalid value for SAMPLE probability [1], expecting a number between 0 and 1, exclusive"
3492+
);
3493+
expectThrows(
3494+
ParsingException.class,
3495+
startsWith("line 1:19: mismatched input '<EOF>' expecting {"),
3496+
() -> statement("FROM test | SAMPLE")
3497+
);
34903498
}
34913499

34923500
static Alias alias(String name, Expression value) {

x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/10_basic.yml

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -413,20 +413,22 @@ FROM EVAL SORT LIMIT with documents_found:
413413
- match: {values.2: ["1",2.0,null,true,123,1674835275193]}
414414

415415
---
416-
"Test Unnamed Input Params Also For Limit":
416+
"Test Unnamed Input Params Also For Limit And Sample":
417417
- requires:
418418
test_runner_features: [ capabilities ]
419419
capabilities:
420420
- method: POST
421421
path: /_query
422422
parameters: [ ]
423-
capabilities: [ parameter_for_limit ]
423+
capabilities: [ parameter_for_limit, parameter_for_sample ]
424424
reason: "named or positional parameters"
425425
- do:
426426
esql.query:
427427
body:
428-
query: 'from test | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
429-
params: ["1", 2.0, null, true, 123, 1674835275193, 3]
428+
# The "| sort time" is to work around https://github.com/elastic/elasticsearch/issues/120272
429+
# TODO: remove it when the issue is fixed
430+
query: 'from test | sort time | sample ? | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
431+
params: [0.999999999999, "1", 2.0, null, true, 123, 1674835275193, 3]
430432

431433
- length: {columns: 6}
432434
- match: {columns.0.name: "x"}
@@ -455,14 +457,16 @@ FROM EVAL SORT LIMIT with documents_found:
455457
- method: POST
456458
path: /_query
457459
parameters: [ ]
458-
capabilities: [ parameter_for_limit ]
460+
capabilities: [ parameter_for_limit, parameter_for_sample ]
459461
reason: "named or positional parameters"
460462

461463
- do:
462464
esql.query:
463465
body:
464-
query: 'from test | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
465-
params: [{"n1" : "1"}, {"n2" : 2.0}, {"n3" : null}, {"n4" : true}, {"n5" : 123}, {"n6": 1674835275193}, {"l": 3}]
466+
# The "| sort time" is to work around https://github.com/elastic/elasticsearch/issues/120272
467+
# TODO: remove it when the issue is fixed
468+
query: 'from test | sort time | sample ? | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
469+
params: [{"s": 0.99999999999}, {"n1" : "1"}, {"n2" : 2.0}, {"n3" : null}, {"n4" : true}, {"n5" : 123}, {"n6": 1674835275193}, {"l": 3}]
466470

467471
- length: {columns: 6}
468472
- match: {columns.0.name: "x"}
@@ -519,14 +523,16 @@ FROM EVAL SORT LIMIT with documents_found:
519523
- method: POST
520524
path: /_query
521525
parameters: [ ]
522-
capabilities: [ parameter_for_limit ]
526+
capabilities: [ parameter_for_limit, parameter_for_sample ]
523527
reason: "named or positional parameters for field names"
524528

525529
- do:
526530
esql.query:
527531
body:
528-
query: 'from test | stats x = count(?f1), y = sum(?f2) by ?f3 | sort ?f3 | keep ?f3, x, y | limit ?l'
529-
params: [{"f1" : {"identifier" : "time"}}, {"f2" : { "identifier" : "count" }}, {"f3" : { "identifier" : "color"}}, {"l": 3}]
532+
# The "| sort time" is to work around https://github.com/elastic/elasticsearch/issues/120272
533+
# TODO: remove it when the issue is fixed
534+
query: 'from test | sort time | sample ?s | stats x = count(?f1), y = sum(?f2) by ?f3 | sort ?f3 | keep ?f3, x, y | limit ?l'
535+
params: [{"s": 0.99999999999}, {"f1" : {"identifier" : "time"}}, {"f2" : { "identifier" : "count" }}, {"f3" : { "identifier" : "color"}}, {"l": 3}]
530536

531537
- length: {columns: 3}
532538
- match: {columns.0.name: "color"}

0 commit comments

Comments
 (0)