Skip to content

Commit 32bcd49

Browse files
authored
Retry timeout tests for aggs (#122031) (#122076)
The aggs timeout test waits for the agg to return and then double checks that the agg is stopped using the tasks API. We're seeing some failures where the tasks API reports that the agg is still running. I can't reproduce them because computers. This adds two things: 1. Logs the hot_threads so we can see if the query is indeed still running. 2. Retries the _tasks API for a minute. If it goes away soon after the _search returns that's *fine*. If it sticks around for more than a few seconds then the cancel isn't working. We wait for a minute because CI can't be trusted to do anything quickly. Closes #121993
1 parent 4fa137d commit 32bcd49

File tree

1 file changed

+28
-7
lines changed
  • x-pack/plugin/analytics/src/javaRestTest/java/org/elasticsearch/multiterms

1 file changed

+28
-7
lines changed

x-pack/plugin/analytics/src/javaRestTest/java/org/elasticsearch/multiterms/AggsTimeoutIT.java

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.elasticsearch.multiterms;
99

1010
import org.apache.http.client.config.RequestConfig;
11+
import org.apache.http.util.EntityUtils;
1112
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
1213
import org.elasticsearch.client.Request;
1314
import org.elasticsearch.common.Strings;
@@ -30,6 +31,7 @@
3031
import java.net.SocketTimeoutException;
3132
import java.util.Locale;
3233
import java.util.Map;
34+
import java.util.concurrent.TimeUnit;
3335

3436
import static org.elasticsearch.test.ListMatcher.matchesList;
3537
import static org.elasticsearch.test.MapMatcher.assertMap;
@@ -287,14 +289,33 @@ private void setTimeout(Request request) {
287289
request.setOptions(request.getOptions().toBuilder().setRequestConfig(config.build()));
288290
}
289291

292+
/**
293+
* Asserts that within a minute the _search has left the _tasks api.
294+
* <p>
295+
* It'd sure be more convenient if, whenever the _search has returned
296+
* back to us the _tasks API doesn't contain the _search. But sometimes
297+
* it still does. So long as it stops <strong>eventually</strong> that's
298+
* still indicative of the interrupt code working.
299+
* </p>
300+
*/
290301
private void assertNoSearchesRunning() throws Exception {
291-
Request tasks = new Request("GET", "/_tasks");
292-
tasks.addParameter("actions", "*search");
293-
tasks.addParameter("detailed", "");
294302
assertBusy(() -> {
295-
Map<?, ?> response = responseAsMap(client().performRequest(tasks));
296-
// If there are running searches the map in `nodes` is non-empty.
297-
assertMap(response, matchesMap().entry("nodes", matchesMap()));
298-
});
303+
Request tasks = new Request("GET", "/_tasks");
304+
tasks.addParameter("actions", "*search");
305+
tasks.addParameter("detailed", "");
306+
assertBusy(() -> {
307+
Map<?, ?> response = responseAsMap(client().performRequest(tasks));
308+
// If there are running searches the map in `nodes` is non-empty.
309+
if (response.isEmpty() == false) {
310+
logger.warn("search still running, hot threads:\n{}", hotThreads());
311+
}
312+
assertMap(response, matchesMap().entry("nodes", matchesMap()));
313+
});
314+
}, 1, TimeUnit.MINUTES);
315+
}
316+
317+
private String hotThreads() throws IOException {
318+
Request tasks = new Request("GET", "/_nodes/hot_threads");
319+
return EntityUtils.toString(client().performRequest(tasks).getEntity());
299320
}
300321
}

0 commit comments

Comments
 (0)