Skip to content

Commit 6c24d44

Browse files
committed
Fix flaky test: TestBrutalShutdown.testQueryRetryOnShutdown
When debugging this tests' failures, I found that some queries were failing with an error that is not marked as retriable, causing this test to fail. By changing the respective error code to one that is retriable this test no longer seems to fail. This may have occurred due to changes in the way the task event loops are scheduled, but I am not 100% sure. Either way, I think this error code is capable of query retries, so the solution seemed suitable.
1 parent 4f3da06 commit 6c24d44

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

presto-spi/src/main/java/com/facebook/presto/spi/StandardErrorCode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public enum StandardErrorCode
8787
REMOTE_TASK_MISMATCH(0x0001_0008, INTERNAL_ERROR),
8888
SERVER_SHUTTING_DOWN(0x0001_0009, INTERNAL_ERROR),
8989
FUNCTION_IMPLEMENTATION_MISSING(0x0001_000A, INTERNAL_ERROR),
90-
REMOTE_BUFFER_CLOSE_FAILED(0x0001_000B, INTERNAL_ERROR),
90+
REMOTE_BUFFER_CLOSE_FAILED(0x0001_000B, INTERNAL_ERROR, true),
9191
SERVER_STARTING_UP(0x0001_000C, INTERNAL_ERROR),
9292
FUNCTION_IMPLEMENTATION_ERROR(0x0001_000D, INTERNAL_ERROR),
9393
INVALID_PROCEDURE_DEFINITION(0x0001_000E, INTERNAL_ERROR),

presto-tests/src/test/java/com/facebook/presto/tests/TestBrutalShutdown.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@
1313
*/
1414
package com.facebook.presto.tests;
1515

16+
import com.facebook.airlift.log.Logger;
1617
import com.facebook.presto.Session;
18+
import com.facebook.presto.common.ErrorCode;
1719
import com.facebook.presto.execution.TaskManager;
1820
import com.facebook.presto.server.BasicQueryInfo;
1921
import com.facebook.presto.server.testing.TestingPrestoServer;
2022
import com.facebook.presto.tpch.TpchPlugin;
2123
import com.facebook.presto.transaction.TransactionInfo;
24+
import com.google.common.base.Joiner;
2225
import com.google.common.collect.ImmutableMap;
2326
import com.google.common.util.concurrent.ListenableFuture;
2427
import com.google.common.util.concurrent.ListeningExecutorService;
@@ -30,10 +33,12 @@
3033
import java.util.ArrayList;
3134
import java.util.List;
3235
import java.util.Map;
36+
import java.util.Optional;
3337

3438
import static com.facebook.airlift.testing.Assertions.assertLessThanOrEqual;
3539
import static com.facebook.presto.execution.QueryState.FINISHED;
3640
import static com.facebook.presto.testing.TestingSession.testSessionBuilder;
41+
import static java.lang.String.format;
3742
import static java.util.concurrent.Executors.newCachedThreadPool;
3843
import static java.util.concurrent.TimeUnit.MILLISECONDS;
3944
import static org.testng.Assert.assertEquals;
@@ -42,6 +47,7 @@
4247
@Test(singleThreaded = true)
4348
public class TestBrutalShutdown
4449
{
50+
private static final Logger LOG = Logger.get(TestBrutalShutdown.class);
4551
private static final long SHUTDOWN_TIMEOUT_MILLIS = 600_000;
4652
private static final Session TINY_SESSION = testSessionBuilder()
4753
.setCatalog("tpch")
@@ -80,6 +86,13 @@ public void testQueryRetryOnShutdown()
8086
totalSuccessfulQueries++;
8187
}
8288
}
89+
if (totalQueries != totalSuccessfulQueries) {
90+
LOG.error(Joiner.on("\n").join(queryInfos.stream()
91+
.filter(queryInfo -> queryInfo.getState() != FINISHED)
92+
.map(queryInfo -> format("query %s should have been successful but is in state: %s. Error: %s, retriable: %s",
93+
queryInfo.getQueryId(), queryInfo.getState(), queryInfo.getErrorCode(), Optional.ofNullable(queryInfo.getErrorCode()).map(ErrorCode::isRetriable).orElse(null)))
94+
.iterator()));
95+
}
8396
assertEquals(totalSuccessfulQueries, totalQueries);
8497
}
8598
}

0 commit comments

Comments
 (0)