Skip to content

Commit 3731cc1

Browse files
committed
different operator categories wrt sampling. Remove SampleBreaking interface
1 parent ca80f64 commit 3731cc1

File tree

6 files changed

+52
-94
lines changed

6 files changed

+52
-94
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ApplySampleCorrections.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
import org.elasticsearch.xpack.esql.expression.function.aggregate.HasSampleCorrection;
1313
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
1414
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
15+
import org.elasticsearch.xpack.esql.plan.logical.Limit;
1516
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
1617
import org.elasticsearch.xpack.esql.plan.logical.Sample;
17-
import org.elasticsearch.xpack.esql.plan.logical.SampleBreaking;
1818
import org.elasticsearch.xpack.esql.rule.Rule;
1919

2020
import java.util.ArrayList;
@@ -36,7 +36,9 @@ public LogicalPlan apply(LogicalPlan logicalPlan) {
3636
: e
3737
);
3838
}
39-
if (plan instanceof SampleBreaking) {
39+
// Operations that map many to many rows break/reset sampling.
40+
// Therefore, the sample probabilities are cleared.
41+
if (plan instanceof Aggregate || plan instanceof Limit) {
4042
sampleProbabilities.clear();
4143
}
4244
return plan;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineSample.java

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,45 @@
1111
import org.elasticsearch.xpack.esql.core.expression.Foldables;
1212
import org.elasticsearch.xpack.esql.core.expression.Literal;
1313
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
14+
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
15+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
16+
import org.elasticsearch.xpack.esql.plan.logical.Filter;
17+
import org.elasticsearch.xpack.esql.plan.logical.Insist;
1418
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
19+
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
20+
import org.elasticsearch.xpack.esql.plan.logical.Project;
21+
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
1522
import org.elasticsearch.xpack.esql.plan.logical.Sample;
16-
import org.elasticsearch.xpack.esql.plan.logical.SampleBreaking;
1723
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
1824

25+
/**
26+
* Pushes down the SAMPLE operator. SAMPLE can be pushed down through an
27+
* operator if
28+
* <p>
29+
* <code>| SAMPLE p | OPERATOR</code>
30+
* <p>
31+
* is equivalent to
32+
* <p>
33+
* <code>| OPERATOR | SAMPLE p</code>
34+
* <p>
35+
* statistically (i.e. same possible output with same probabilities).
36+
* In that case, we push down sampling to Lucene for efficiency.
37+
* <p>
38+
*
39+
* As a rule of thumb, if an operator can be swapped with sampling if it maps:
40+
* <ul>
41+
* <li>
42+
* one row to one row (e.g. <code>DISSECT</code>, <code>DROP</code>, <code>ENRICH</code>,
43+
* <code>EVAL</code>, <code>GROK</code>, <code>KEEP</code>, <code>RENAME</code>)
44+
* </li>
45+
* <li>
46+
* one row to zero or one row (<code>WHERE</code>)
47+
* </li>
48+
* <li>
49+
* reorders the rows (<code>SORT</code>)
50+
* </li>
51+
* </ul>
52+
*/
1953
public class PushDownAndCombineSample extends OptimizerRules.ParameterizedOptimizerRule<Sample, LogicalOptimizerContext> {
2054

2155
public PushDownAndCombineSample() {
@@ -30,9 +64,16 @@ protected LogicalPlan rule(Sample sample, LogicalOptimizerContext context) {
3064
var probability = combinedProbability(context, sample, sampleChild);
3165
var seed = combinedSeed(context, sample, sampleChild);
3266
plan = new Sample(sample.source(), probability, seed, sampleChild.child());
33-
} else if (child instanceof UnaryPlan unaryChild && child instanceof SampleBreaking == false) {
34-
plan = unaryChild.replaceChild(sample.replaceChild(unaryChild.child()));
35-
}
67+
} else if (child instanceof Enrich
68+
|| child instanceof Eval
69+
|| child instanceof Filter
70+
|| child instanceof Insist
71+
|| child instanceof OrderBy
72+
|| child instanceof Project
73+
|| child instanceof RegexExtract) {
74+
var unaryChild = (UnaryPlan) child;
75+
plan = unaryChild.replaceChild(sample.replaceChild(unaryChild.child()));
76+
}
3677
return plan;
3778
}
3879

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Aggregate.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
4343
import static org.elasticsearch.xpack.esql.plan.logical.Filter.checkFilterConditionDataType;
4444

45-
public class Aggregate extends UnaryPlan implements PostAnalysisVerificationAware, TelemetryAware, SortAgnostic, SampleBreaking {
45+
public class Aggregate extends UnaryPlan implements PostAnalysisVerificationAware, TelemetryAware, SortAgnostic {
4646
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
4747
LogicalPlan.class,
4848
"Aggregate",

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import java.io.IOException;
1919
import java.util.Objects;
2020

21-
public class Limit extends UnaryPlan implements TelemetryAware, SampleBreaking {
21+
public class Limit extends UnaryPlan implements TelemetryAware {
2222
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Limit", Limit::new);
2323

2424
private final Expression limit;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/MvExpand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import java.util.List;
2424
import java.util.Objects;
2525

26-
public class MvExpand extends UnaryPlan implements TelemetryAware, SortAgnostic, SampleBreaking {
26+
public class MvExpand extends UnaryPlan implements TelemetryAware, SortAgnostic {
2727
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "MvExpand", MvExpand::new);
2828

2929
private final NamedExpression target;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/SampleBreaking.java

Lines changed: 0 additions & 85 deletions
This file was deleted.

0 commit comments

Comments
 (0)