Skip to content

Commit 4c284d8

Browse files
authored
After a HeapAttack failure, skip remaining tests (#142708)
Adds a utility to `ESTestCase` to allow skipping remaining tests after a single failure. Then uses it in the `HeapAttackIT`. This is good because `HeapAttackIT` failures *typically* poison the cluster under test, causing subsequent tests to fail. This poisons our failure reports and confuses our muting robot. And confuses anyone reviewing the failure. Skipping the remaining tests should make them easier to read for the robots and humans.
1 parent b841b50 commit 4c284d8

File tree

3 files changed

+55
-10
lines changed

3 files changed

+55
-10
lines changed

test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackTestCase.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,18 @@ public abstract class HeapAttackTestCase extends ESRestTestCase {
5454
@ClassRule
5555
public static ElasticsearchCluster cluster = Clusters.buildCluster();
5656

57-
static volatile boolean SUITE_ABORTED = false;
58-
5957
protected static final int MAX_ATTEMPTS = 5;
6058

59+
@Override
60+
protected boolean shouldFailureSkipRemainingTests() {
61+
/*
62+
* Failures will frequently poison the cluster being tested, causing the next
63+
* test to fail because the cluster has OOMed or exploded in some fun way. So
64+
* we skip them.
65+
*/
66+
return true;
67+
}
68+
6169
protected interface TryCircuitBreaking {
6270
Map<String, Object> attempt(int attempt) throws IOException;
6371
}
@@ -67,11 +75,6 @@ protected String getTestRestCluster() {
6775
return cluster.getHttpAddresses();
6876
}
6977

70-
@Before
71-
public void skipOnAborted() {
72-
assumeFalse("skip on aborted", SUITE_ABORTED);
73-
}
74-
7578
protected void assertCircuitBreaks(TryCircuitBreaking tryBreaking) throws IOException {
7679
assertCircuitBreaks(
7780
tryBreaking,
@@ -125,7 +128,6 @@ public void onFailure(Exception e) {
125128

126129
@Override
127130
protected void doRun() throws Exception {
128-
SUITE_ABORTED = true;
129131
TimeValue elapsed = TimeValue.timeValueNanos(System.nanoTime() - startedTimeInNanos);
130132
logger.info("--> test {} triggering OOM after {}", getTestName(), elapsed);
131133
Request triggerOOM = new Request("POST", "/_trigger_out_of_memory");
@@ -301,7 +303,7 @@ protected static void assertWriteResponse(Response response) throws IOException
301303
@Before
302304
@After
303305
public void assertRequestBreakerEmpty() throws Exception {
304-
if (SUITE_ABORTED) {
306+
if (previousFailureSkipsRemaining()) {
305307
return;
306308
}
307309
assertBusy(() -> {

test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@
166166
import org.junit.internal.AssumptionViolatedException;
167167
import org.junit.rules.RuleChain;
168168
import org.junit.rules.TestRule;
169+
import org.junit.rules.TestWatcher;
170+
import org.junit.runner.Description;
169171

170172
import java.io.IOException;
171173
import java.io.InputStream;
@@ -233,6 +235,7 @@
233235
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
234236
import static org.hamcrest.Matchers.hasItem;
235237
import static org.hamcrest.Matchers.startsWith;
238+
import static org.junit.Assume.assumeFalse;
236239

237240
/**
238241
* Base testcase for randomized unit testing with Elasticsearch
@@ -3182,4 +3185,44 @@ public static BytesRef embedInRandomBytes(BytesRef bytesRef) {
31823185

31833186
return new BytesRef(newBytesArray, offset, bytesRef.length);
31843187
}
3188+
3189+
private static boolean previousFailureSkipsRemaining;
3190+
@Rule
3191+
public final TestWatcher previousFailureSkipsRemainingRule = new TestWatcher() {
3192+
@Override
3193+
protected void failed(Throwable e, Description description) {
3194+
previousFailureSkipsRemaining = shouldFailureSkipRemainingTests();
3195+
}
3196+
};
3197+
3198+
@Before
3199+
public final void checkPreviousFailureSkipsRemaining() {
3200+
assumeFalse("previous failures broke system under test", previousFailureSkipsRemaining);
3201+
}
3202+
3203+
/**
3204+
* Should a failure cause subsequent tests to be skipped?
3205+
*/
3206+
protected boolean shouldFailureSkipRemainingTests() {
3207+
return false;
3208+
}
3209+
3210+
/**
3211+
* Have previous failures forced us to skip the test?
3212+
* <p>
3213+
* This should only be used in rare cases where the system being tested
3214+
* is typically poisoned by test failures. ESQL's HeapAttack tests are
3215+
* like this. As are packaging tests.
3216+
* </p>
3217+
* <p>
3218+
* If you find yourself reaching for this, ask yourself if it's the right
3219+
* tool three times before actually picking it up. If you are writing a
3220+
* unit test without dependencies this is almost certainly not the right
3221+
* tool.
3222+
* </p>
3223+
*/
3224+
protected boolean previousFailureSkipsRemaining() {
3225+
return previousFailureSkipsRemaining;
3226+
}
3227+
31853228
}

test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ protected HttpHost buildHttpHost(String host, int port) {
641641
*/
642642
@After
643643
public final void cleanUpCluster() throws Exception {
644-
if (preserveClusterUponCompletion() == false) {
644+
if (previousFailureSkipsRemaining() == false && preserveClusterUponCompletion() == false) {
645645
ensureNoInitializingShards();
646646
wipeCluster();
647647
waitForClusterStateUpdatesToFinish();

0 commit comments

Comments
 (0)