Skip to content

Commit d619544

Browse files
authored
Fix flapping warming cluster integration tests (#145)
* Try to fix flapping warming cluster integration tests Signed-off-by: slonka <[email protected]> * Remove delay, add assertion on 503 Signed-off-by: slonka <[email protected]>
1 parent a75b971 commit d619544

File tree

2 files changed

+74
-108
lines changed

2 files changed

+74
-108
lines changed

server/src/test/java/io/envoyproxy/controlplane/server/V2DiscoveryServerAdsWarmingClusterIT.java

Lines changed: 37 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
package io.envoyproxy.controlplane.server;
22

3-
import static io.envoyproxy.controlplane.server.V2TestSnapshots.createSnapshot;
43
import static io.envoyproxy.envoy.api.v2.core.ApiVersion.V2;
54
import static io.restassured.RestAssured.given;
65
import static org.assertj.core.api.Assertions.assertThat;
76
import static org.awaitility.Awaitility.await;
87
import static org.hamcrest.Matchers.containsString;
98

109
import com.google.protobuf.util.Durations;
11-
import io.envoyproxy.controlplane.cache.CacheStatusInfo;
1210
import io.envoyproxy.controlplane.cache.NodeGroup;
13-
import io.envoyproxy.controlplane.cache.Resources;
1411
import io.envoyproxy.controlplane.cache.TestResources;
1512
import io.envoyproxy.controlplane.cache.v2.SimpleCache;
1613
import io.envoyproxy.controlplane.cache.v2.Snapshot;
@@ -26,10 +23,7 @@
2623
import io.envoyproxy.envoy.api.v2.core.Node;
2724
import io.grpc.netty.NettyServerBuilder;
2825
import io.restassured.http.ContentType;
29-
import java.util.concurrent.ConcurrentMap;
3026
import java.util.concurrent.CountDownLatch;
31-
import java.util.concurrent.ExecutorService;
32-
import java.util.concurrent.Executors;
3327
import java.util.concurrent.TimeUnit;
3428
import org.junit.ClassRule;
3529
import org.junit.Test;
@@ -43,7 +37,7 @@ public class V2DiscoveryServerAdsWarmingClusterIT {
4337
private static final String CONFIG = "envoy/ads.v2.config.yaml";
4438
private static final String GROUP = "key";
4539
private static final Integer LISTENER_PORT = 10000;
46-
private static final CustomCache<String> cache = new CustomCache<>(new NodeGroup<String>() {
40+
private static final SimpleCache<String> cache = new SimpleCache<>(new NodeGroup<String>() {
4741
@Override public String hash(Node node) {
4842
return GROUP;
4943
}
@@ -60,7 +54,6 @@ public class V2DiscoveryServerAdsWarmingClusterIT {
6054
private static final NettyGrpcServerRule ADS = new NettyGrpcServerRule() {
6155
@Override
6256
protected void configureServerBuilder(NettyServerBuilder builder) {
63-
ExecutorService executorService = Executors.newSingleThreadExecutor();
6457
final DiscoveryServerCallbacks callbacks = new DiscoveryServerCallbacks() {
6558
@Override
6659
public void onStreamOpen(long streamId, String typeUrl) {
@@ -82,7 +75,6 @@ public void onV3StreamRequest(long streamId,
8275
public void onStreamResponse(long streamId, DiscoveryRequest request, DiscoveryResponse response) {
8376
// Here we update a Snapshot with working cluster, but we change only CDS version, not EDS version.
8477
// This change allows to test if EDS will be sent anyway after CDS was sent.
85-
createSnapshotWithWorkingClusterWithTheSameEdsVersion(request, executorService);
8678
onStreamResponseLatch.countDown();
8779
}
8880
};
@@ -131,30 +123,35 @@ public void validateTestRequestToEchoServerViaEnvoy() throws InterruptedExceptio
131123

132124
String baseUri = String.format("http://%s:%d", ENVOY.getContainerIpAddress(), ENVOY.getMappedPort(LISTENER_PORT));
133125

126+
await().atMost(5, TimeUnit.SECONDS).ignoreExceptions().untilAsserted(
127+
() -> given().baseUri(baseUri).contentType(ContentType.TEXT)
128+
.when().get("/")
129+
.then().statusCode(503));
130+
131+
// Here we update a Snapshot with working cluster, but we change only CDS version, not EDS version.
132+
// This change allows to test if EDS will be sent anyway after CDS was sent.
133+
createSnapshotWithWorkingClusterWithTheSameEdsVersion();
134+
134135
await().atMost(5, TimeUnit.SECONDS).ignoreExceptions().untilAsserted(
135136
() -> given().baseUri(baseUri).contentType(ContentType.TEXT)
136137
.when().get("/")
137138
.then().statusCode(200)
138139
.and().body(containsString(UPSTREAM.response)));
139140
}
140141

141-
private static void createSnapshotWithWorkingClusterWithTheSameEdsVersion(DiscoveryRequest request,
142-
ExecutorService executorService) {
143-
if (request.getTypeUrl().equals(Resources.V2.CLUSTER_TYPE_URL)) {
144-
executorService.submit(() -> cache.setSnapshot(
145-
GROUP,
146-
createSnapshot(true,
147-
"upstream",
148-
UPSTREAM.ipAddress(),
149-
EchoContainer.PORT,
150-
"listener0",
151-
LISTENER_PORT,
152-
"route0",
153-
"2"))
154-
);
155-
}
142+
private static void createSnapshotWithWorkingClusterWithTheSameEdsVersion() {
143+
cache.setSnapshot(GROUP,
144+
V2TestSnapshots.createSnapshot(true,
145+
"upstream",
146+
UPSTREAM.ipAddress(),
147+
EchoContainer.PORT,
148+
"listener0",
149+
LISTENER_PORT,
150+
"route0",
151+
"2"));
156152
}
157153

154+
158155
private static Snapshot createSnapshotWithNotWorkingCluster(boolean ads,
159156
String clusterName,
160157
String endpointAddress,
@@ -197,34 +194,21 @@ private static Snapshot createSnapshotWithNotWorkingCluster(boolean ads,
197194
}
198195

199196

200-
/**
201-
* Code has been copied from io.envoyproxy.controlplane.cache.SimpleCache to show specific case when
202-
* Envoy might stuck with warming cluster. Class has changed lines from method respondWithSpecificOrder which are
203-
* responsible for responding for watches. Because to reproduce this problem we need a lot of connected Envoy's and
204-
* changes to snapshot it is easier to reproduce this way.
197+
/*
198+
* In the previous versions of this tests we had a copied SimpleCache with respondWithSpecificOrder removed.
199+
* With new versions of Envoy hitting this edge-case became highly improbable.
200+
* Now this test checks only if a CDS change will also send EDS.
201+
* 1. Envoy connects to control-plane
202+
* 2. Snapshot already exists in control-plane <- other instance share same group
203+
* 3. Control-plane respond with CDS in createWatch method
204+
* 4. There is snapshot update which change CDS and EDS versions
205+
* 5. Envoy sends EDS request
206+
* 6. Control-plane respond with EDS in createWatch method
207+
* 7. Envoy resume CDS and EDS requests.
208+
* 8. Envoy sends request CDS
209+
* 9. Control plane respond with CDS in createWatch method
210+
* 10. Envoy sends EDS requests
211+
* 11. Control plane doesn't respond because version hasn't changed
212+
* 12. Cluster of service stays in warming phase
205213
*/
206-
static class CustomCache<T> extends SimpleCache<T> {
207-
208-
public CustomCache(NodeGroup<T> groups) {
209-
super(groups);
210-
}
211-
212-
@Override
213-
protected void respondWithSpecificOrder(T group, Snapshot snapshot,
214-
ConcurrentMap<Resources.ResourceType, CacheStatusInfo<T>> status) {
215-
// This code has been removed to show specific case which is hard to reproduce in integration test:
216-
// 1. Envoy connects to control-plane
217-
// 2. Snapshot already exists in control-plane <- other instance share same group
218-
// 3. Control-plane respond with CDS in createWatch method
219-
// 4. There is snapshot update which change CDS and EDS versions
220-
// 5. Envoy sends EDS request
221-
// 6. Control-plane respond with EDS in createWatch method
222-
// 7. Envoy resume CDS and EDS requests.
223-
// 8. Envoy sends request CDS
224-
// 9. Control plane respond with CDS in createWatch method
225-
// 10. Envoy sends EDS requests
226-
// 11. Control plane doesn't respond because version hasn't changed
227-
// 12. Cluster of service stays in warming phase
228-
}
229-
}
230214
}

server/src/test/java/io/envoyproxy/controlplane/server/V3DiscoveryServerAdsWarmingClusterIT.java

Lines changed: 37 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
import static org.hamcrest.Matchers.containsString;
99

1010
import com.google.protobuf.util.Durations;
11-
import io.envoyproxy.controlplane.cache.CacheStatusInfo;
1211
import io.envoyproxy.controlplane.cache.NodeGroup;
13-
import io.envoyproxy.controlplane.cache.Resources;
1412
import io.envoyproxy.controlplane.cache.TestResources;
1513
import io.envoyproxy.controlplane.cache.v3.SimpleCache;
1614
import io.envoyproxy.controlplane.cache.v3.Snapshot;
@@ -26,10 +24,7 @@
2624
import io.envoyproxy.envoy.service.discovery.v3.DiscoveryResponse;
2725
import io.grpc.netty.NettyServerBuilder;
2826
import io.restassured.http.ContentType;
29-
import java.util.concurrent.ConcurrentMap;
3027
import java.util.concurrent.CountDownLatch;
31-
import java.util.concurrent.ExecutorService;
32-
import java.util.concurrent.Executors;
3328
import java.util.concurrent.TimeUnit;
3429
import org.junit.ClassRule;
3530
import org.junit.Test;
@@ -43,7 +38,7 @@ public class V3DiscoveryServerAdsWarmingClusterIT {
4338
private static final String CONFIG = "envoy/ads.v3.config.yaml";
4439
private static final String GROUP = "key";
4540
private static final Integer LISTENER_PORT = 10000;
46-
private static final CustomCache<String> cache = new CustomCache<>(new NodeGroup<String>() {
41+
private static final SimpleCache<String> cache = new SimpleCache<>(new NodeGroup<String>() {
4742
@Override public String hash(Node node) {
4843
throw new IllegalStateException("Unexpected v2 request in v3 test");
4944
}
@@ -60,7 +55,6 @@ public class V3DiscoveryServerAdsWarmingClusterIT {
6055
private static final NettyGrpcServerRule ADS = new NettyGrpcServerRule() {
6156
@Override
6257
protected void configureServerBuilder(NettyServerBuilder builder) {
63-
ExecutorService executorService = Executors.newSingleThreadExecutor();
6458
final DiscoveryServerCallbacks callbacks = new DiscoveryServerCallbacks() {
6559
@Override
6660
public void onStreamOpen(long streamId, String typeUrl) {
@@ -87,9 +81,6 @@ public void onStreamResponse(long streamId, io.envoyproxy.envoy.api.v2.Discovery
8781
@Override
8882
public void onV3StreamResponse(long streamId, DiscoveryRequest request,
8983
DiscoveryResponse response) {
90-
// Here we update a Snapshot with working cluster, but we change only CDS version, not EDS version.
91-
// This change allows to test if EDS will be sent anyway after CDS was sent.
92-
createSnapshotWithWorkingClusterWithTheSameEdsVersion(request, executorService);
9384
onStreamResponseLatch.countDown();
9485
}
9586
};
@@ -138,28 +129,33 @@ public void validateTestRequestToEchoServerViaEnvoy() throws InterruptedExceptio
138129

139130
String baseUri = String.format("http://%s:%d", ENVOY.getContainerIpAddress(), ENVOY.getMappedPort(LISTENER_PORT));
140131

132+
await().atMost(5, TimeUnit.SECONDS).ignoreExceptions().untilAsserted(
133+
() -> given().baseUri(baseUri).contentType(ContentType.TEXT)
134+
.when().get("/")
135+
.then().statusCode(503));
136+
137+
// Here we update a Snapshot with working cluster, but we change only CDS version, not EDS version.
138+
// This change allows to test if EDS will be sent anyway after CDS was sent.
139+
createSnapshotWithWorkingClusterWithTheSameEdsVersion();
140+
141141
await().atMost(5, TimeUnit.SECONDS).ignoreExceptions().untilAsserted(
142142
() -> given().baseUri(baseUri).contentType(ContentType.TEXT)
143143
.when().get("/")
144144
.then().statusCode(200)
145145
.and().body(containsString(UPSTREAM.response)));
146146
}
147147

148-
private static void createSnapshotWithWorkingClusterWithTheSameEdsVersion(DiscoveryRequest request,
149-
ExecutorService executorService) {
150-
if (request.getTypeUrl().equals(Resources.V3.CLUSTER_TYPE_URL)) {
151-
executorService.submit(() -> cache.setSnapshot(
152-
GROUP,
153-
createSnapshot(true,
154-
"upstream",
155-
UPSTREAM.ipAddress(),
156-
EchoContainer.PORT,
157-
"listener0",
158-
LISTENER_PORT,
159-
"route0",
160-
"2"))
161-
);
162-
}
148+
private static void createSnapshotWithWorkingClusterWithTheSameEdsVersion() {
149+
cache.setSnapshot(
150+
GROUP,
151+
createSnapshot(true,
152+
"upstream",
153+
UPSTREAM.ipAddress(),
154+
EchoContainer.PORT,
155+
"listener0",
156+
LISTENER_PORT,
157+
"route0",
158+
"2"));
163159
}
164160

165161
private static Snapshot createSnapshotWithNotWorkingCluster(boolean ads,
@@ -204,35 +200,21 @@ private static Snapshot createSnapshotWithNotWorkingCluster(boolean ads,
204200
"2");
205201
}
206202

207-
208-
/**
209-
* Code has been copied from io.envoyproxy.controlplane.cache.SimpleCache to show specific case when
210-
* Envoy might stuck with warming cluster. Class has changed lines from method respondWithSpecificOrder which are
211-
* responsible for responding for watches. Because to reproduce this problem we need a lot of connected Envoy's and
212-
* changes to snapshot it is easier to reproduce this way.
203+
/*
204+
* In the previous versions of this tests we had a copied SimpleCache with respondWithSpecificOrder removed.
205+
* With new versions of Envoy hitting this edge-case became highly improbable.
206+
* Now this test checks only if a CDS change will also send EDS.
207+
* 1. Envoy connects to control-plane
208+
* 2. Snapshot already exists in control-plane <- other instance share same group
209+
* 3. Control-plane respond with CDS in createWatch method
210+
* 4. There is snapshot update which change CDS and EDS versions
211+
* 5. Envoy sends EDS request
212+
* 6. Control-plane respond with EDS in createWatch method
213+
* 7. Envoy resume CDS and EDS requests.
214+
* 8. Envoy sends request CDS
215+
* 9. Control plane respond with CDS in createWatch method
216+
* 10. Envoy sends EDS requests
217+
* 11. Control plane doesn't respond because version hasn't changed
218+
* 12. Cluster of service stays in warming phase
213219
*/
214-
static class CustomCache<T> extends SimpleCache<T> {
215-
216-
public CustomCache(NodeGroup<T> groups) {
217-
super(groups);
218-
}
219-
220-
@Override
221-
protected void respondWithSpecificOrder(T group, Snapshot snapshot,
222-
ConcurrentMap<Resources.ResourceType, CacheStatusInfo<T>> status) {
223-
// This code has been removed to show specific case which is hard to reproduce in integration test:
224-
// 1. Envoy connects to control-plane
225-
// 2. Snapshot already exists in control-plane <- other instance share same group
226-
// 3. Control-plane respond with CDS in createWatch method
227-
// 4. There is snapshot update which change CDS and EDS versions
228-
// 5. Envoy sends EDS request
229-
// 6. Control-plane respond with EDS in createWatch method
230-
// 7. Envoy resume CDS and EDS requests.
231-
// 8. Envoy sends request CDS
232-
// 9. Control plane respond with CDS in createWatch method
233-
// 10. Envoy sends EDS requests
234-
// 11. Control plane doesn't respond because version hasn't changed
235-
// 12. Cluster of service stays in warming phase
236-
}
237-
}
238220
}

0 commit comments

Comments
 (0)