diff --git a/docs/changelog/135051.yaml b/docs/changelog/135051.yaml new file mode 100644 index 0000000000000..f9b22e0f9d570 --- /dev/null +++ b/docs/changelog/135051.yaml @@ -0,0 +1,5 @@ +pr: 135051 +summary: Ban Limit + `MvExpand` before remote Enrich +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java index f848c663d3ca6..301273d624614 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java @@ -466,6 +466,20 @@ public void testEnrichCoordinatorThenEnrichRemote() { assertThat(error.getMessage(), containsString("ENRICH with remote policy can't be executed after [ENRICH _COORDINATOR")); } + public void testEnrichAfterMvExpandLimit() { + String query = String.format(Locale.ROOT, """ + FROM *:events,events + | SORT timestamp + | LIMIT 2 + | eval ip= TO_STR(host) + | MV_EXPAND host + | WHERE ip != "" + | %s + """, enrichHosts(Enrich.Mode.REMOTE)); + var error = expectThrows(VerificationException.class, () -> runQuery(query, randomBoolean()).close()); + assertThat(error.getMessage(), containsString("MV_EXPAND after LIMIT is incompatible with remote ENRICH")); + } + private static void assertCCSExecutionInfoDetails(EsqlExecutionInfo executionInfo) { assertThat(executionInfo.overallTook().millis(), greaterThanOrEqualTo(0L)); assertTrue(executionInfo.isCrossClusterSearch()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java index dee3ea147fbdd..d2d764f337d14 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; +import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware; import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; import org.elasticsearch.xpack.esql.capabilities.TelemetryAware; import org.elasticsearch.xpack.esql.common.Failures; @@ -48,6 +49,7 @@ public class Enrich extends UnaryPlan implements GeneratingPlan, PostOptimizationVerificationAware, + PostAnalysisVerificationAware, TelemetryAware, SortAgnostic, ExecutesOn { @@ -284,6 +286,36 @@ private void checkForPlansForbiddenBeforeRemoteEnrich(Failures failures) { fails.forEach(f -> failures.add(fail(this, "ENRICH with remote policy can't be executed after [" + f.text() + "]" + f.source()))); } + /** + * Remote ENRICH (and any remote operation in fact) is not compatible with MV_EXPAND + LIMIT. Consider: + * `FROM *:events | SORT @timestamp | LIMIT 2 | MV_EXPAND ip | ENRICH _remote:clientip_policy ON ip` + * Semantically, this must take two top events and then expand them. However, this can not be executed remotely, + * because this means that we have to take top 2 events on each node, then expand them, then apply Enrich, + * then bring them to the coordinator - but then we can not select top 2 of them - because that would be pre-expand! + * We do not know which expanded rows are coming from the true top rows and which are coming from "false" top rows + * which should have been thrown out. This is only possible to execute if MV_EXPAND executes on the coordinator + * - which contradicts remote Enrich. + * This could be fixed by the optimizer by moving MV_EXPAND past ENRICH, at least in some cases, but currently we do not do that. + */ + private void checkMvExpandAfterLimit(Failures failures) { + this.forEachDown(MvExpand.class, u -> { + u.forEachDown(p -> { + if (p instanceof Limit || p instanceof TopN) { + failures.add(fail(this, "MV_EXPAND after LIMIT is incompatible with remote ENRICH")); + } + }); + }); + + } + + @Override + public void postAnalysisVerification(Failures failures) { + if (this.mode == Mode.REMOTE) { + checkMvExpandAfterLimit(failures); + } + + } + @Override public void postOptimizationVerification(Failures failures) { if (this.mode == Mode.REMOTE) {