Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public void sendResponse(Exception exception) {
} */

try (
// This only calls REMOTE_CLUSTER_1 which is skip_unavailable=true
EsqlQueryResponse resp = runQuery(
"FROM logs-*,c*:logs-* | EVAL lookup_key = v | LOOKUP JOIN values_lookup ON lookup_key",
randomBoolean()
Expand All @@ -112,9 +113,8 @@ public void sendResponse(Exception exception) {
assertThat(remoteCluster.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SKIPPED));
assertThat(remoteCluster.getFailures(), not(empty()));
var failure = remoteCluster.getFailures().get(0);
// FIXME: this produces a wrong message currently
// assertThat(failure.reason(), containsString(simulatedFailure.getMessage()));
assertThat(failure.reason(), containsString("lookup index [values_lookup] is not available in remote cluster [cluster-a]"));
assertThat(failure.reason(), containsString(simulatedFailure.getMessage()));
assertThat(failure.reason(), containsString("lookup failed in remote cluster [cluster-a] for index [values_lookup]"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prior message looks more specific to me (eg index is not available in remote cluster or no index vs lookup failed in remote cluster sounds like any failue). Does simulatedFailure message cover for that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The prior message was wrong - this is the scenario where the index is available, but for some reason (represented by a random error) the mapping can not be fetched from the cluster. So yes, simulatedFailure will represent the real error message - instead of previously misleading message that index is not available where the problem could have been elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing index is covered by other tests (in CrossClusterLookupJoinIT.java etc.) this is scenario where there's a general failure that is not identifiable as missing index.

}

try (
Expand All @@ -138,24 +138,26 @@ public void sendResponse(Exception exception) {
assertThat(remoteCluster2.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL));
assertThat(remoteCluster.getFailures(), not(empty()));
var failure = remoteCluster.getFailures().get(0);
// FIXME: this produces a wrong message currently
// assertThat(failure.reason(), containsString(simulatedFailure.getMessage()));
assertThat(failure.reason(), containsString("lookup index [values_lookup] is not available in remote cluster [cluster-a]"));
assertThat(failure.reason(), containsString(simulatedFailure.getMessage()));
assertThat(failure.reason(), containsString("lookup failed in remote cluster [cluster-a] for index [values_lookup]"));
}

// now fail
setSkipUnavailable(REMOTE_CLUSTER_1, false);
// c*: only calls REMOTE_CLUSTER_1 which is skip_unavailable=false now
Exception ex = expectThrows(
VerificationException.class,
() -> runQuery("FROM logs-*,c*:logs-* | EVAL lookup_key = v | LOOKUP JOIN values_lookup ON lookup_key", randomBoolean())
);
assertThat(ex.getMessage(), containsString("lookup index [values_lookup] is not available in remote cluster [cluster-a]"));
assertThat(ex.getMessage(), containsString("lookup failed in remote cluster [cluster-a] for index [values_lookup]"));
String message = ex.getCause() == null ? ex.getMessage() : ex.getCause().getMessage();
assertThat(message, containsString(simulatedFailure.getMessage()));

ex = expectThrows(
Exception.class,
() -> runQuery("FROM c*:logs-* | EVAL lookup_key = v | LOOKUP JOIN values_lookup ON lookup_key", randomBoolean())
);
String message = ex.getCause() == null ? ex.getMessage() : ex.getCause().getMessage();
message = ex.getCause() == null ? ex.getMessage() : ex.getCause().getMessage();
assertThat(message, containsString(simulatedFailure.getMessage()));
} finally {
for (TransportService transportService : cluster(REMOTE_CLUSTER_1).getInstances(TransportService.class)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,8 @@ public VerificationException(Failures failures) {
super(failures.toString());
}

public VerificationException(String message, Throwable cause) {
super(message, cause);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,10 @@ private void preAnalyzeLookupIndex(
}

private void skipClusterOrError(String clusterAlias, EsqlExecutionInfo executionInfo, String message) {
VerificationException error = new VerificationException(message);
skipClusterOrError(clusterAlias, executionInfo, new VerificationException(message));
}

private void skipClusterOrError(String clusterAlias, EsqlExecutionInfo executionInfo, ElasticsearchException error) {
// If we can, skip the cluster and mark it as such
if (executionInfo.shouldSkipOnFailure(clusterAlias)) {
EsqlCCSUtils.markClusterWithFinalStateAndNoShards(executionInfo, clusterAlias, EsqlExecutionInfo.Cluster.Status.SKIPPED, error);
Expand Down Expand Up @@ -528,11 +531,7 @@ private PreAnalysisResult receiveLookupIndexResolution(
String clusterAlias = cluster.getClusterAlias();
if (clustersWithResolvedIndices.containsKey(clusterAlias) == false) {
// Missing cluster resolution
skipClusterOrError(
clusterAlias,
executionInfo,
"lookup index [" + index + "] is not available " + EsqlCCSUtils.inClusterName(clusterAlias)
);
skipClusterOrError(clusterAlias, executionInfo, findFailure(lookupIndexResolution.failures(), index, clusterAlias));
}
});

Expand All @@ -542,6 +541,19 @@ private PreAnalysisResult receiveLookupIndexResolution(
);
}

private ElasticsearchException findFailure(Map<String, List<FieldCapabilitiesFailure>> failures, String index, String clusterAlias) {
if (failures.containsKey(clusterAlias)) {
var exc = failures.get(clusterAlias).stream().findFirst().map(FieldCapabilitiesFailure::getException);
if (exc.isPresent()) {
return new VerificationException(
"lookup failed " + EsqlCCSUtils.inClusterName(clusterAlias) + " for index [" + index + "]",
ExceptionsHelper.unwrapCause(exc.get())
);
}
}
return new VerificationException("lookup index [" + index + "] is not available " + EsqlCCSUtils.inClusterName(clusterAlias));
}

/**
* Check whether the lookup index resolves to a single concrete index on all clusters or not.
* If it's a single index, we are compatible with old pre-9.2 LOOKUP JOIN code and just need to send the same resolution as we did.
Expand Down