Skip to content

Commit 0adc9b1

Browse files
committed
JVMCBC-1645 Test: Stabilize QueryDelayDisconnectIntegrationTest
Motivation ---------- This test was failing on CI because it was not letting the base class configure the cluster environment via `createCluster()`. Modifications ------------- Create the cluster using `createCluster()` so the base class can configure the cluster environment. Rework the test so it more reliably triggers the `EndpointDisconnectDelayedEvent`. Unfortunately, there doesn't seem to be a great way to cancel an in-flight query, so the test relies on a long-running query that is expected to time out. Change-Id: Ief262d95be64fc40e1d35b837a1a64e0dd6e102f Reviewed-on: https://review.couchbase.org/c/couchbase-jvm-clients/+/228014 Tested-by: Build Bot <[email protected]> Reviewed-by: Will Broadbelt <[email protected]>
1 parent 8406f42 commit 0adc9b1

File tree

2 files changed

+109
-102
lines changed

2 files changed

+109
-102
lines changed

java-client/src/integrationTest/java/com/couchbase/client/java/QueryDelayDisconnectIntegrationTest.java

Lines changed: 97 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -16,132 +16,134 @@
1616

1717
package com.couchbase.client.java;
1818

19+
import com.couchbase.client.core.cnc.Event;
1920
import com.couchbase.client.core.cnc.SimpleEventBus;
2021
import com.couchbase.client.core.cnc.events.endpoint.EndpointDisconnectDelayedEvent;
2122
import com.couchbase.client.core.cnc.events.endpoint.EndpointDisconnectResumedEvent;
22-
import com.couchbase.client.core.cnc.events.endpoint.EndpointStateChangedEvent;
2323
import com.couchbase.client.core.config.ProposedGlobalConfigContext;
24-
import com.couchbase.client.core.service.ServiceType;
25-
import com.couchbase.client.java.env.ClusterEnvironment;
26-
import com.couchbase.client.java.query.QueryScanConsistency;
24+
import com.couchbase.client.core.error.TimeoutException;
25+
import com.couchbase.client.java.json.JsonArray;
2726
import com.couchbase.client.java.util.JavaIntegrationTest;
2827
import com.couchbase.client.test.Capabilities;
2928
import com.couchbase.client.test.IgnoreWhen;
30-
import org.junit.jupiter.api.AfterAll;
31-
import org.junit.jupiter.api.AfterEach;
32-
import org.junit.jupiter.api.BeforeAll;
33-
import org.junit.jupiter.api.BeforeEach;
29+
import org.jspecify.annotations.Nullable;
3430
import org.junit.jupiter.api.Test;
31+
import org.opentest4j.AssertionFailedError;
32+
import org.slf4j.Logger;
33+
import org.slf4j.LoggerFactory;
3534

3635
import java.time.Duration;
37-
import java.util.Collections;
38-
import java.util.LinkedList;
3936
import java.util.List;
40-
import java.util.concurrent.ExecutionException;
41-
import java.util.concurrent.atomic.AtomicBoolean;
37+
import java.util.UUID;
38+
import java.util.concurrent.CompletableFuture;
4239

40+
import static com.couchbase.client.core.util.CbCollections.listOf;
41+
import static com.couchbase.client.java.QueryIntegrationTest.verySlowQueryStatement;
4342
import static com.couchbase.client.java.manager.query.QueryIndexManagerIntegrationTest.DISABLE_QUERY_TESTS_FOR_CLUSTER;
4443
import static com.couchbase.client.java.query.QueryOptions.queryOptions;
44+
import static com.couchbase.client.test.Util.waitUntilCondition;
45+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
46+
import static java.util.stream.Collectors.toList;
4547
import static org.junit.jupiter.api.Assertions.assertEquals;
48+
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
4649

47-
/**
48-
* Verifies the delayed disconnect of an in-progress query Must be in package com.couchbase.client.core to override
49-
* Core.createConfigurationProvider()
50-
* <p>
51-
* Disabling against 5.5. See comment on QueryIndexManagerIntegrationTest for details.
52-
*/
5350
@IgnoreWhen(missesCapabilities = {Capabilities.QUERY, Capabilities.CLUSTER_LEVEL_QUERY},
5451
clusterVersionEquals = DISABLE_QUERY_TESTS_FOR_CLUSTER)
5552
class QueryDelayDisconnectIntegrationTest extends JavaIntegrationTest {
53+
private static final Logger log = LoggerFactory.getLogger(QueryDelayDisconnectIntegrationTest.class);
5654

57-
private static ClusterEnvironment initEnv;
58-
private static Cluster initCluster;
59-
60-
private static ClusterEnvironment environment;
61-
private static Cluster cluster;
62-
63-
private static SimpleEventBus eventBus = new SimpleEventBus(true, Collections.singletonList(EndpointStateChangedEvent.class));
64-
65-
@BeforeAll
66-
static void setup() throws ExecutionException, InterruptedException {
67-
68-
// create a cluster for initialization.
69-
// do not use the same cluster for testing as the initialization cluster is polluted by accessing the bucket.
70-
71-
initEnv = ClusterEnvironment.builder().build();
72-
ClusterOptions initOpts = ClusterOptions.clusterOptions(authenticator()).environment(initEnv);
73-
initCluster = Cluster.connect(connectionString(), initOpts);//createCluster(env -> ClusterEnvironment.builder().eventBus(eventBus).build());
74-
Bucket bucket = initCluster.bucket(config().bucketname());
75-
76-
bucket.waitUntilReady(WAIT_UNTIL_READY_DEFAULT);
77-
waitForService(bucket, ServiceType.QUERY);
78-
waitForQueryIndexerToHaveKeyspace(initCluster, config().bucketname());
79-
80-
createPrimaryIndex(initCluster, config().bucketname());
81-
82-
for (int i = 0; i < 100; i++) {
83-
initCluster.bucket(config().bucketname()).defaultCollection().upsert("" + i, "{}");
55+
@Test
56+
void inflightQueryIsAllowedToCompleteIfNodeLeavesCluster() throws Throwable {
57+
// This test relies on a query timeout expiring. Minimize typical execution time by
58+
// starting with short timeout and trying again with a longer timeout if necessary.
59+
try {
60+
testWithQueryTimeout(Duration.ofSeconds(4)); // long enough for laptop or GHA
61+
} catch (Throwable t) {
62+
log.warn("Test failed with short query timeout. Trying again with longer timeout.", t);
63+
testWithQueryTimeout(Duration.ofSeconds(40)); // long enough for a glacial CI environment
8464
}
85-
86-
// create a cluster for testing
87-
88-
environment = ClusterEnvironment.builder().eventBus(eventBus).ioConfig(io -> io.configPollInterval(Duration.ofHours(24))).build();
89-
ClusterOptions opts = ClusterOptions.clusterOptions(authenticator()).environment(environment);
90-
cluster = Cluster.connect(connectionString(), opts);
91-
9265
}
9366

94-
@BeforeEach
95-
void beforeEach() {
67+
static void testWithQueryTimeout(Duration queryTimeout) throws Exception {
68+
Duration gracePeriod = queryTimeout.dividedBy(3);
69+
log.info("Testing with queryTimeout={} gracePeriod={}", queryTimeout, gracePeriod);
70+
71+
SimpleEventBus eventBus = new SimpleEventBus(true);
72+
73+
try (Cluster cluster = createCluster(env -> env
74+
.eventBus(eventBus)
75+
.ioConfig(io -> io.configPollInterval(Duration.ofHours(24)))
76+
)) {
77+
String clientContextId = UUID.randomUUID().toString(); // so we can monitor query state
78+
CompletableFuture<Throwable> queryErrorFuture = new CompletableFuture<>();
79+
80+
log.info("Scheduling a query we expect to time out.");
81+
cluster.reactive()
82+
.query(
83+
verySlowQueryStatement(),
84+
queryOptions()
85+
.clientContextId(clientContextId)
86+
.timeout(queryTimeout)
87+
)
88+
.subscribe(
89+
result -> queryErrorFuture.complete(new AssertionFailedError("Expected query to time out.")),
90+
queryErrorFuture::complete
91+
);
92+
93+
log.info("Waiting for query execution to start.");
94+
waitUntilCondition(
95+
() -> assertEquals("running", getQueryState(cluster, clientContextId), "query state"),
96+
gracePeriod
97+
);
98+
99+
log.info("Tricking the SDK into thinking all query nodes went away.");
100+
cluster.core().configurationProvider().proposeGlobalConfig(
101+
new ProposedGlobalConfigContext(dummyConfigWithNoQueryNodes, "127.0.0.1", true)
102+
);
103+
104+
log.info("Verifying network channel closure was deferred.");
105+
waitUntilEvents(eventBus, gracePeriod, listOf(
106+
EndpointDisconnectDelayedEvent.class
107+
));
108+
109+
log.info("Waiting for query timeout.");
110+
Duration pollTimeout = queryTimeout.plus(gracePeriod);
111+
Throwable t = queryErrorFuture.get(pollTimeout.toMillis(), MILLISECONDS);
112+
assertInstanceOf(TimeoutException.class, t);
113+
114+
log.info("Verifying network channel was closed.");
115+
waitUntilEvents(eventBus, gracePeriod, listOf(
116+
EndpointDisconnectDelayedEvent.class,
117+
EndpointDisconnectResumedEvent.class
118+
));
119+
}
96120
}
97121

98-
@AfterEach
99-
void afterEach() {
100-
eventBus.clear();
122+
private static void waitUntilEvents(SimpleEventBus eventBus, Duration gracePeriod, List<Class<?>> expectedEventClasses) {
123+
waitUntilCondition(
124+
() -> assertEquals(expectedEventClasses, getEventClasses(eventBus)),
125+
gracePeriod
126+
);
101127
}
102128

103-
@AfterAll
104-
static void tearDown() {
105-
cluster.disconnect();
106-
environment.shutdown();
107-
108-
for (int i = 0; i < 100; i++) {
109-
initCluster.bucket(config().bucketname()).defaultCollection().remove("" + i);
110-
}
111-
initCluster.disconnect();
112-
initEnv.shutdown();
129+
private static List<Class<?>> getEventClasses(SimpleEventBus eventBus) {
130+
return eventBus.publishedEvents().stream()
131+
.filter(e -> e instanceof EndpointDisconnectDelayedEvent || e instanceof EndpointDisconnectResumedEvent)
132+
.map(Event::getClass)
133+
.collect(toList());
113134
}
114135

115-
@Test
116-
void simpleQueryClose() throws InterruptedException, ExecutionException {
117-
118-
// Start a query.
119-
// When the first row is retrieved, modify the configuration by removing the n1ql nodes
120-
// The query endpoint will not be closed until the query completes
121-
122-
AtomicBoolean first = new AtomicBoolean(true);
123-
cluster.reactive().query(
124-
"select * from `" + config().bucketname() + "` a UNNEST(SELECT b.* FROM `" + config().bucketname()
125-
+ "` b limit 100) AS c limit 10000",
126-
queryOptions().metrics(true).scanConsistency(QueryScanConsistency.REQUEST_PLUS)
127-
).block()
128-
.rowsAs(byte[].class).doOnNext(it -> {
129-
if (first.compareAndSet(true, false)) {
130-
cluster.reactive().core().configurationProvider().proposeGlobalConfig(
131-
new ProposedGlobalConfigContext(dummyConfig, "localhost", true)
132-
);
133-
}
134-
}).blockLast();
135-
cluster.reactive().core().shutdown().block(); // flush out events
136-
List<Class> events = new LinkedList<>();
137-
eventBus.publishedEvents().stream().filter(e -> e instanceof EndpointDisconnectDelayedEvent || e instanceof EndpointDisconnectResumedEvent).forEach(e -> events.add(e.getClass()));
138-
assertEquals(2, events.size());
139-
assertEquals(events.get(0), EndpointDisconnectDelayedEvent.class);
140-
assertEquals(events.get(1), EndpointDisconnectResumedEvent.class);
141-
136+
private static @Nullable String getQueryState(Cluster cluster, String clientContextId) {
137+
return cluster.query(
138+
"SELECT RAW state FROM system:active_requests WHERE clientContextID = ?",
139+
queryOptions()
140+
.parameters(JsonArray.from(clientContextId))
141+
)
142+
.rowsAs(String.class)
143+
.stream().findFirst().orElse(null);
142144
}
143145

144-
String dummyConfig = "{\n" +
146+
private static final String dummyConfigWithNoQueryNodes = "{\n" +
145147
" \"revEpoch\": 9999999999,\n" +
146148
" \"rev\": 13205,\n" +
147149
" \"nodesExt\": [\n" +

java-client/src/integrationTest/java/com/couchbase/client/java/QueryIntegrationTest.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -252,23 +252,28 @@ void blockingStreamingRowActionRunsInCallerThread() {
252252

253253
@Test
254254
void blockingStreamingCanTimeOut() {
255-
// Thanks vsr1! https://www.couchbase.com/forums/t/how-to-force-a-sql-query-to-take-a-long-time/39658
256-
String verySlowStatement = "SELECT COUNT (1) AS c FROM" +
257-
" ARRAY_RANGE(0,10000) AS d1," +
258-
" ARRAY_RANGE(0,10000) AS d2," +
259-
" ARRAY_RANGE(0,10000) AS d3";
260-
261255
assertThrows(
262256
TimeoutException.class,
263257
() -> cluster.queryStreaming(
264-
verySlowStatement,
258+
verySlowQueryStatement(),
265259
queryOptions()
266260
.timeout(Duration.ofMillis(1)),
267261
row -> fail("Did not expect to receive result row")
268262
)
269263
);
270264
}
271265

266+
/**
267+
* Returns a query statement that is not expected to complete in a reasonable amount of time.
268+
*/
269+
static String verySlowQueryStatement() {
270+
// Thanks vsr1! https://www.couchbase.com/forums/t/how-to-force-a-sql-query-to-take-a-long-time/39658
271+
return "SELECT COUNT (1) AS c FROM" +
272+
" ARRAY_RANGE(0,10000) AS d1," +
273+
" ARRAY_RANGE(0,10000) AS d2," +
274+
" ARRAY_RANGE(0,10000) AS d3";
275+
}
276+
272277
@Test
273278
void blockingStreamingThrowsCancellationWhenThreadAlreadyInterrupted() {
274279
Thread.currentThread().interrupt();

0 commit comments

Comments
 (0)