-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Improve filter handling for ESQL CCS #126807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
9f05ded
6f348b9
7313daf
fb7fb6a
8610845
a51bf2a
d58d6e6
9f432eb
c730f21
d2df0e1
492daae
9cc3fad
14e70bb
f9d78f2
216bd11
6cbfe19
9098695
e66972a
7b71cf7
4cbf9c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,4 +132,8 @@ public DataType type() { | |
| public List<String> originalTypes() { | ||
| return originalTypes; | ||
| } | ||
|
|
||
| public String toString() { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not required for the fix strictly speaking, but it improves observability of columns and makes it much easier to inspect them when debugging. |
||
| return "ColumnInfoImpl{" + "name='" + name + '\'' + ", type=" + type + ", originalTypes=" + originalTypes + '}'; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -355,15 +355,13 @@ public void analyzedPlan( | |
| }).<PreAnalysisResult>andThen((l, result) -> { | ||
| // TODO in follow-PR (for skip_unavailable handling of missing concrete indexes) add some tests for | ||
| // invalid index resolution to updateExecutionInfo | ||
| if (result.indices.isValid()) { | ||
| // CCS indices and skip_unavailable cluster values can stop the analysis right here | ||
| if (allCCSClustersSkipped(executionInfo, result, logicalPlanListener)) return; | ||
| } | ||
| // If we run out of clusters to search due to unavailability we can stop the analysis right here | ||
| if (result.indices.isValid() && allCCSClustersSkipped(executionInfo, result, logicalPlanListener)) return; | ||
| // whatever tuple we have here (from CCS-special handling or from the original pre-analysis), pass it on to the next step | ||
| l.onResponse(result); | ||
| }).<PreAnalysisResult>andThen((l, result) -> { | ||
| // first attempt (maybe the only one) at analyzing the plan | ||
| analyzeAndMaybeRetry(analyzeAction, requestFilter, result, logicalPlanListener, l); | ||
| analyzeAndMaybeRetry(analyzeAction, requestFilter, result, executionInfo, logicalPlanListener, l); | ||
| }).<PreAnalysisResult>andThen((l, result) -> { | ||
| assert requestFilter != null : "The second pre-analysis shouldn't take place when there is no index filter in the request"; | ||
|
|
||
|
|
@@ -374,6 +372,10 @@ public void analyzedPlan( | |
| LOGGER.debug("Analyzing the plan (second attempt, without filter)"); | ||
| LogicalPlan plan; | ||
| try { | ||
| // the order here is tricky - if the cluster has been filtered and later became unavailable, | ||
| // do we want to declare it successful or skipped? For now, unavailability takes precedence. | ||
smalyshev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| EsqlCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, result.indices.unavailableClusters()); | ||
| EsqlCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, result.indices, null); | ||
| plan = analyzeAction.apply(result); | ||
| } catch (Exception e) { | ||
| l.onFailure(e); | ||
|
|
@@ -484,12 +486,12 @@ private boolean allCCSClustersSkipped( | |
| ActionListener<LogicalPlan> logicalPlanListener | ||
| ) { | ||
| IndexResolution indexResolution = result.indices; | ||
| EsqlCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution); | ||
| EsqlCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, indexResolution.unavailableClusters()); | ||
| if (executionInfo.isCrossClusterSearch() | ||
| && executionInfo.getClusterStates(EsqlExecutionInfo.Cluster.Status.RUNNING).findAny().isEmpty()) { | ||
| // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel Exception | ||
| // to let the LogicalPlanActionListener decide how to proceed | ||
| LOGGER.debug("No more clusters to search, ending analysis stage"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this logging useful? If we see that in the logs, how does it help? Is it useful for debugging? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is as it shows what is happening in the debug log - and yes, this is mainly for debugging. |
||
| logicalPlanListener.onFailure(new NoClustersToSearchException()); | ||
| return true; | ||
| } | ||
|
|
@@ -501,6 +503,7 @@ private static void analyzeAndMaybeRetry( | |
| Function<PreAnalysisResult, LogicalPlan> analyzeAction, | ||
| QueryBuilder requestFilter, | ||
| PreAnalysisResult result, | ||
| EsqlExecutionInfo executionInfo, | ||
| ActionListener<LogicalPlan> logicalPlanListener, | ||
| ActionListener<PreAnalysisResult> l | ||
| ) { | ||
|
|
@@ -510,6 +513,10 @@ private static void analyzeAndMaybeRetry( | |
| LOGGER.debug("Analyzing the plan ({} attempt, {} filter)", attemptMessage, filterPresentMessage); | ||
|
|
||
| try { | ||
| if (result.indices.isValid() || requestFilter != null) { | ||
smalyshev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // Capture filtered out indices | ||
| EsqlCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, result.indices, requestFilter); | ||
| } | ||
| plan = analyzeAction.apply(result); | ||
| } catch (Exception e) { | ||
| if (e instanceof VerificationException ve) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any valid reason for this change?
Fyi, the line:column information there is relevant for the accuracy of the parsing error messaging system. Those two numbers are linked to the Node itself (which comes from the ANTLR parser) and is used by the Verifier to build a consistent error message, no matter the error message itself and which Node it comes from.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this test is too strict as it doesn't allow changing any details of how the errors are reported, and that led to some failures, but this may be older versions of the patch and I'll re-check why exactly it is needed. In general, I think our tests relying on specific error messages makes some things hard to refactor or fix (i.e. we have some parts of code produce "unknown index" and some "no such index" and we have a lot of tests that rely on that, and that means we essentially are testing implementation details instead of functionality, down to exact wording of the error message). In this particular case, beyond checking that there's an unknown index, it also checks that this is the only problem and that it is reported in a very specific way - which means if we changed how exactly unknown indices are handled in a particular case (e.g., say, moved the check from runtime to planning time, as we may have to do), this test would likely break. It may not be necessary now for this patch - going to check that - but I think this is something we may want to consider.