1111import org .elasticsearch .xpack .esql .core .expression .Foldables ;
1212import org .elasticsearch .xpack .esql .core .expression .Literal ;
1313import org .elasticsearch .xpack .esql .optimizer .LogicalOptimizerContext ;
14+ import org .elasticsearch .xpack .esql .plan .logical .Enrich ;
15+ import org .elasticsearch .xpack .esql .plan .logical .Eval ;
16+ import org .elasticsearch .xpack .esql .plan .logical .Filter ;
17+ import org .elasticsearch .xpack .esql .plan .logical .Insist ;
1418import org .elasticsearch .xpack .esql .plan .logical .LogicalPlan ;
19+ import org .elasticsearch .xpack .esql .plan .logical .OrderBy ;
20+ import org .elasticsearch .xpack .esql .plan .logical .Project ;
21+ import org .elasticsearch .xpack .esql .plan .logical .RegexExtract ;
1522import org .elasticsearch .xpack .esql .plan .logical .Sample ;
16- import org .elasticsearch .xpack .esql .plan .logical .SampleBreaking ;
1723import org .elasticsearch .xpack .esql .plan .logical .UnaryPlan ;
1824
25+ /**
26+ * Pushes down the SAMPLE operator. SAMPLE can be pushed down through an
27+ * operator if
28+ * <p>
29+ * <code>| SAMPLE p | OPERATOR</code>
30+ * <p>
31+ * is equivalent to
32+ * <p>
33+ * <code>| OPERATOR | SAMPLE p</code>
34+ * <p>
35+ * statistically (i.e. same possible output with same probabilities).
36+ * In that case, we push down sampling to Lucene for efficiency.
37+ * <p>
38+ *
39+ * As a rule of thumb, if an operator can be swapped with sampling if it maps:
40+ * <ul>
41+ * <li>
42+ * one row to one row (e.g. <code>DISSECT</code>, <code>DROP</code>, <code>ENRICH</code>,
43+ * <code>EVAL</code>, <code>GROK</code>, <code>KEEP</code>, <code>RENAME</code>)
44+ * </li>
45+ * <li>
46+ * one row to zero or one row (<code>WHERE</code>)
47+ * </li>
48+ * <li>
49+ * reorders the rows (<code>SORT</code>)
50+ * </li>
51+ * </ul>
52+ */
1953public class PushDownAndCombineSample extends OptimizerRules .ParameterizedOptimizerRule <Sample , LogicalOptimizerContext > {
2054
2155 public PushDownAndCombineSample () {
@@ -30,9 +64,16 @@ protected LogicalPlan rule(Sample sample, LogicalOptimizerContext context) {
3064 var probability = combinedProbability (context , sample , sampleChild );
3165 var seed = combinedSeed (context , sample , sampleChild );
3266 plan = new Sample (sample .source (), probability , seed , sampleChild .child ());
33- } else if (child instanceof UnaryPlan unaryChild && child instanceof SampleBreaking == false ) {
34- plan = unaryChild .replaceChild (sample .replaceChild (unaryChild .child ()));
35- }
67+ } else if (child instanceof Enrich
68+ || child instanceof Eval
69+ || child instanceof Filter
70+ || child instanceof Insist
71+ || child instanceof OrderBy
72+ || child instanceof Project
73+ || child instanceof RegexExtract ) {
74+ var unaryChild = (UnaryPlan ) child ;
75+ plan = unaryChild .replaceChild (sample .replaceChild (unaryChild .child ()));
76+ }
3677 return plan ;
3778 }
3879
0 commit comments