Skip to content

Commit 53eff0f

Browse files
committed
add SampleBreaking interface
1 parent 1318bbc commit 53eff0f

File tree

5 files changed

+91
-8
lines changed

5 files changed

+91
-8
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ApplySampleCorrections.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
import org.elasticsearch.xpack.esql.expression.function.aggregate.HasSampleCorrection;
1313
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
1414
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
15-
import org.elasticsearch.xpack.esql.plan.logical.Limit;
1615
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
17-
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
1816
import org.elasticsearch.xpack.esql.plan.logical.Sample;
17+
import org.elasticsearch.xpack.esql.plan.logical.SampleBreaking;
1918
import org.elasticsearch.xpack.esql.rule.Rule;
2019

2120
import java.util.ArrayList;
@@ -30,15 +29,14 @@ public LogicalPlan apply(LogicalPlan logicalPlan) {
3029
if (plan instanceof Sample sample) {
3130
sampleProbabilities.add(sample.probability());
3231
}
33-
if (plan instanceof Limit || plan instanceof MvExpand) {
34-
sampleProbabilities.clear();
35-
}
3632
if (plan instanceof Aggregate && sampleProbabilities.isEmpty() == false) {
3733
plan = plan.transformExpressionsOnly(
3834
e -> e instanceof HasSampleCorrection hsc && hsc.isSampleCorrected() == false
3935
? hsc.sampleCorrection(getSampleProbability(sampleProbabilities, e.source()))
4036
: e
4137
);
38+
}
39+
if (plan instanceof SampleBreaking) {
4240
sampleProbabilities.clear();
4341
}
4442
return plan;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Aggregate.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
4343
import static org.elasticsearch.xpack.esql.plan.logical.Filter.checkFilterConditionDataType;
4444

45-
public class Aggregate extends UnaryPlan implements PostAnalysisVerificationAware, TelemetryAware, SortAgnostic {
45+
public class Aggregate extends UnaryPlan implements PostAnalysisVerificationAware, TelemetryAware, SortAgnostic, SampleBreaking {
4646
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
4747
LogicalPlan.class,
4848
"Aggregate",

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import java.io.IOException;
1919
import java.util.Objects;
2020

21-
public class Limit extends UnaryPlan implements TelemetryAware {
21+
public class Limit extends UnaryPlan implements TelemetryAware, SampleBreaking {
2222
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Limit", Limit::new);
2323

2424
private final Expression limit;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/MvExpand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import java.util.List;
2424
import java.util.Objects;
2525

26-
public class MvExpand extends UnaryPlan implements TelemetryAware, SortAgnostic {
26+
public class MvExpand extends UnaryPlan implements TelemetryAware, SortAgnostic, SampleBreaking {
2727
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "MvExpand", MvExpand::new);
2828

2929
private final NamedExpression target;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.plan.logical;
9+
10+
/**
11+
* This interface is to check whether a plan breaks the random sampling context.
12+
* <p>
13+
*
14+
* Random sampling aims to correct <code>STATS</code> after them, e.g.
15+
* <p>
16+
* <code>
17+
* | SAMPLE 0.1 | STATS SUM(value)
18+
* </code>
19+
* <p>
20+
* gives an estimate of the sum of the values, and not 10% of the sum.
21+
* <p>
22+
*
23+
* For many commands inbetween this works fine, because they can be swapped
24+
* with <code>SAMPLE</code>. For example,
25+
* <p>
26+
* <code>
27+
* | SAMPLE 0.1 | SORT value
28+
* </code>
29+
* <p>
30+
* <code>
31+
* | SAMPLE 0.1 | WHERE value > 10
32+
* </code>
33+
* <p>
34+
* are equivalent to
35+
* <p>
36+
* <code>
37+
* | SORT value | SAMPLE 0.1
38+
* </code>
39+
* <p>
40+
* <code>
41+
* | WHERE value > 10 | SAMPLE 0.1
42+
* </code>
43+
* <p>
44+
* (statistically equivalent, not necessary identical), and therefore succeeding
45+
* <code>STATS</code> can be adjusted for the sample size.
46+
* <p>
47+
*
48+
* In other cases, commands cannot be swapped with <code>SAMPLE</code>, e.g.
49+
* <p>
50+
* <code>
51+
* | SAMPLE 0.1 | MV_EXPAND value
52+
* </code>
53+
* <p>
54+
* <code>
55+
* | SAMPLE 0.1 | LIMIT 100
56+
* </code>
57+
* <p>
58+
* In those cases, it also makes no sense to correct any succeeding <code>STATS</code>.
59+
* <p>
60+
*
61+
* As a rule of thumb, if an operator can be swapped with random sampling if it maps:
62+
* <ul>
63+
* <li>
64+
* one row to one row (e.g. <code>DISSECT</code>, <code>DROP</code>, <code>ENRICH</code>,
65+
* <code>EVAL</code>, <code>GROK</code>, <code>KEEP</code>, <code>RENAME</code>)
66+
* </li>
67+
* <li>
68+
* one row to zero or one row (<code>WHERE</code>)
69+
* </li>
70+
* <li>
71+
* reorders the rows (<code>SORT</code>)
72+
* </li>
73+
* </ul>
74+
* Contrarily, it is sampling breaking (and should implement this interface) if it maps:
75+
* <ul>
76+
* <li>
77+
* one row to many rows (<code>MV_EXPAND</code>)
78+
* </li>
79+
* <li>
80+
* many rows to many rows (<code>LIMIT</code>, <code>STATS</code>)
81+
* </li>
82+
* </ul>
83+
*
84+
*/
85+
public interface SampleBreaking {}

0 commit comments

Comments
 (0)