Skip to content

Commit 297ab05

Browse files
authored
xds: Convert CdsLb to XdsDepManager
I noticed we deviated from gRFC A37 in some ways. It turned out those were added to the gRFC later in grpc/proposal#344: - NACKing empty aggregate clusters - Failing aggregate cluster when children could not be loaded - Recusion limit of 16. We had this behavior already, but it was ascribed to matching C++ There's disagreement on whether we should actually fail the aggregate cluster for bad children, so I'm preserving the pre-existing behavior for now. The code is now doing a depth-first leaf traversal, not breadth-first. This was odd to see, but the code was also pretty old, so the reasoning seems lost to history. Since we haven't seen more than a single level of aggregate clusters in practice, this wouldn't have been noticed by users. XdsDependencyManager.start() was created to guarantee that the callback could not be called before returning from the constructor. Otherwise XDS_CLUSTER_SUBSCRIPT_REGISTRY could potentially be null.
1 parent a16d655 commit 297ab05

12 files changed

+731
-1036
lines changed

xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java

Lines changed: 129 additions & 334 deletions
Large diffs are not rendered by default.

xds/src/main/java/io/grpc/xds/CdsLoadBalancerProvider.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
@Internal
3737
public class CdsLoadBalancerProvider extends LoadBalancerProvider {
3838

39-
private static final String CLUSTER_KEY = "cluster";
40-
4139
@Override
4240
public boolean isAvailable() {
4341
return true;
@@ -70,9 +68,12 @@ public ConfigOrError parseLoadBalancingPolicyConfig(
7068
*/
7169
static ConfigOrError parseLoadBalancingConfigPolicy(Map<String, ?> rawLoadBalancingPolicyConfig) {
7270
try {
73-
String cluster =
74-
JsonUtil.getString(rawLoadBalancingPolicyConfig, CLUSTER_KEY);
75-
return ConfigOrError.fromConfig(new CdsConfig(cluster));
71+
String cluster = JsonUtil.getString(rawLoadBalancingPolicyConfig, "cluster");
72+
Boolean isDynamic = JsonUtil.getBoolean(rawLoadBalancingPolicyConfig, "is_dynamic");
73+
if (isDynamic == null) {
74+
isDynamic = Boolean.FALSE;
75+
}
76+
return ConfigOrError.fromConfig(new CdsConfig(cluster, isDynamic));
7677
} catch (RuntimeException e) {
7778
return ConfigOrError.fromError(
7879
Status.UNAVAILABLE.withCause(e).withDescription(
@@ -89,15 +90,28 @@ static final class CdsConfig {
8990
* Name of cluster to query CDS for.
9091
*/
9192
final String name;
93+
/**
94+
* Whether this cluster was dynamically chosen, so the XdsDependencyManager may be unaware of
95+
* it without an explicit cluster subscription.
96+
*/
97+
final boolean isDynamic;
9298

9399
CdsConfig(String name) {
100+
this(name, false);
101+
}
102+
103+
CdsConfig(String name, boolean isDynamic) {
94104
checkArgument(name != null && !name.isEmpty(), "name is null or empty");
95105
this.name = name;
106+
this.isDynamic = isDynamic;
96107
}
97108

98109
@Override
99110
public String toString() {
100-
return MoreObjects.toStringHelper(this).add("name", name).toString();
111+
return MoreObjects.toStringHelper(this)
112+
.add("name", name)
113+
.add("isDynamic", isDynamic)
114+
.toString();
101115
}
102116
}
103117
}

xds/src/main/java/io/grpc/xds/RingHashLoadBalancerProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ private ConfigOrError parseLoadBalancingPolicyConfigInternal(
104104
}
105105
if (minRingSize <= 0 || maxRingSize <= 0 || minRingSize > maxRingSize) {
106106
return ConfigOrError.fromError(Status.UNAVAILABLE.withDescription(
107-
"Invalid 'mingRingSize'/'maxRingSize'"));
107+
"Invalid 'minRingSize'/'maxRingSize'"));
108108
}
109109
return ConfigOrError.fromConfig(
110110
new RingHashConfig(minRingSize, maxRingSize, requestHashHeader));

xds/src/main/java/io/grpc/xds/XdsClusterResource.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ static CdsUpdate processCluster(Cluster cluster,
172172
lbConfig.getPolicyName()).parseLoadBalancingPolicyConfig(
173173
lbConfig.getRawConfigValue());
174174
if (configOrError.getError() != null) {
175-
throw new ResourceInvalidException(structOrError.getErrorDetail());
175+
throw new ResourceInvalidException(
176+
"Failed to parse lb config for cluster '" + cluster.getName() + "': "
177+
+ configOrError.getError());
176178
}
177179

178180
updateBuilder.lbPolicyConfig(lbPolicyConfig);
@@ -209,6 +211,10 @@ private static StructOrError<CdsUpdate.Builder> parseAggregateCluster(Cluster cl
209211
} catch (InvalidProtocolBufferException e) {
210212
return StructOrError.fromError("Cluster " + clusterName + ": malformed ClusterConfig: " + e);
211213
}
214+
if (clusterConfig.getClustersList().isEmpty()) {
215+
return StructOrError.fromError("Cluster " + clusterName
216+
+ ": aggregate ClusterConfig.clusters must not be empty");
217+
}
212218
return StructOrError.fromStruct(CdsUpdate.forAggregate(
213219
clusterName, clusterConfig.getClustersList()));
214220
}

xds/src/main/java/io/grpc/xds/XdsConfig.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,12 @@ XdsConfig build() {
254254
}
255255

256256
public interface XdsClusterSubscriptionRegistry {
257-
Closeable subscribeToCluster(String clusterName);
257+
Subscription subscribeToCluster(String clusterName);
258+
}
259+
260+
public interface Subscription extends Closeable {
261+
/** Release resources without throwing exceptions. */
262+
@Override
263+
void close();
258264
}
259265
}

xds/src/main/java/io/grpc/xds/XdsDependencyManager.java

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import static com.google.common.base.Preconditions.checkArgument;
2020
import static com.google.common.base.Preconditions.checkNotNull;
21+
import static com.google.common.base.Preconditions.checkState;
2122
import static io.grpc.xds.client.XdsClient.ResourceUpdate;
2223

2324
import com.google.common.annotations.VisibleForTesting;
@@ -34,8 +35,6 @@
3435
import io.grpc.xds.client.XdsClient;
3536
import io.grpc.xds.client.XdsClient.ResourceWatcher;
3637
import io.grpc.xds.client.XdsResourceType;
37-
import java.io.Closeable;
38-
import java.io.IOException;
3938
import java.util.Collections;
4039
import java.util.HashMap;
4140
import java.util.HashSet;
@@ -56,39 +55,43 @@
5655
final class XdsDependencyManager implements XdsConfig.XdsClusterSubscriptionRegistry {
5756
public static final XdsClusterResource CLUSTER_RESOURCE = XdsClusterResource.getInstance();
5857
public static final XdsEndpointResource ENDPOINT_RESOURCE = XdsEndpointResource.getInstance();
59-
private static final int MAX_CLUSTER_RECURSION_DEPTH = 16; // Matches C++
58+
private static final int MAX_CLUSTER_RECURSION_DEPTH = 16; // Specified by gRFC A37
6059
private final String listenerName;
6160
private final XdsClient xdsClient;
62-
private final XdsConfigWatcher xdsConfigWatcher;
6361
private final SynchronizationContext syncContext;
6462
private final String dataPlaneAuthority;
63+
private XdsConfigWatcher xdsConfigWatcher;
6564

6665
private StatusOr<XdsConfig> lastUpdate = null;
6766
private final Map<XdsResourceType<?>, TypeWatchers<?>> resourceWatchers = new HashMap<>();
6867
private final Set<ClusterSubscription> subscriptions = new HashSet<>();
6968

70-
XdsDependencyManager(XdsClient xdsClient, XdsConfigWatcher xdsConfigWatcher,
69+
XdsDependencyManager(XdsClient xdsClient,
7170
SynchronizationContext syncContext, String dataPlaneAuthority,
7271
String listenerName, NameResolver.Args nameResolverArgs,
7372
ScheduledExecutorService scheduler) {
7473
this.listenerName = checkNotNull(listenerName, "listenerName");
7574
this.xdsClient = checkNotNull(xdsClient, "xdsClient");
76-
this.xdsConfigWatcher = checkNotNull(xdsConfigWatcher, "xdsConfigWatcher");
7775
this.syncContext = checkNotNull(syncContext, "syncContext");
7876
this.dataPlaneAuthority = checkNotNull(dataPlaneAuthority, "dataPlaneAuthority");
7977
checkNotNull(nameResolverArgs, "nameResolverArgs");
8078
checkNotNull(scheduler, "scheduler");
81-
82-
// start the ball rolling
83-
syncContext.execute(() -> addWatcher(new LdsWatcher(listenerName)));
8479
}
8580

8681
public static String toContextStr(String typeName, String resourceName) {
8782
return typeName + " resource " + resourceName;
8883
}
8984

85+
public void start(XdsConfigWatcher xdsConfigWatcher) {
86+
checkState(this.xdsConfigWatcher == null, "dep manager may not be restarted");
87+
this.xdsConfigWatcher = checkNotNull(xdsConfigWatcher, "xdsConfigWatcher");
88+
// start the ball rolling
89+
syncContext.execute(() -> addWatcher(new LdsWatcher(listenerName)));
90+
}
91+
9092
@Override
91-
public Closeable subscribeToCluster(String clusterName) {
93+
public XdsConfig.Subscription subscribeToCluster(String clusterName) {
94+
checkState(this.xdsConfigWatcher != null, "dep manager must first be started");
9295
checkNotNull(clusterName, "clusterName");
9396
ClusterSubscription subscription = new ClusterSubscription(clusterName);
9497

@@ -291,10 +294,17 @@ private static void addConfigForCluster(
291294
addConfigForCluster(clusters, childCluster, ancestors, tracer);
292295
StatusOr<XdsConfig.XdsClusterConfig> config = clusters.get(childCluster);
293296
if (!config.hasValue()) {
294-
clusters.put(clusterName, StatusOr.fromStatus(Status.INTERNAL.withDescription(
295-
"Unable to get leaves for " + clusterName + ": "
296-
+ config.getStatus().getDescription())));
297-
return;
297+
// gRFC A37 says: If any of a CDS policy's watchers reports that the resource does not
298+
// exist the policy should report that it is in TRANSIENT_FAILURE. If any of the
299+
// watchers reports a transient ADS stream error, the policy should report that it is in
300+
// TRANSIENT_FAILURE if it has never passed a config to its child.
301+
//
302+
// But there's currently disagreement about whether that is actually what we want, and
303+
// that was not originally implemented in gRPC Java. So we're keeping Java's old
304+
// behavior for now and only failing the "leaves" (which is a bit arbitrary for a
305+
// cycle).
306+
leafNames.add(childCluster);
307+
continue;
298308
}
299309
XdsConfig.XdsClusterConfig.ClusterChild children = config.getValue().getChildren();
300310
if (children instanceof AggregateConfig) {
@@ -325,6 +335,11 @@ private static void addConfigForCluster(
325335
default:
326336
throw new IllegalStateException("Unexpected value: " + cdsUpdate.clusterType());
327337
}
338+
if (clusters.containsKey(clusterName)) {
339+
// If a cycle is detected, we'll have detected it while recursing, so now there will be a key
340+
// present. We don't want to overwrite it with a non-error value.
341+
return;
342+
}
328343
clusters.put(clusterName, StatusOr.fromValue(
329344
new XdsConfig.XdsClusterConfig(clusterName, cdsUpdate, child)));
330345
}
@@ -406,7 +421,7 @@ public interface XdsConfigWatcher {
406421
void onUpdate(StatusOr<XdsConfig> config);
407422
}
408423

409-
private final class ClusterSubscription implements Closeable {
424+
private final class ClusterSubscription implements XdsConfig.Subscription {
410425
private final String clusterName;
411426
boolean closed; // Accessed from syncContext
412427

@@ -419,7 +434,7 @@ String getClusterName() {
419434
}
420435

421436
@Override
422-
public void close() throws IOException {
437+
public void close() {
423438
releaseSubscription(this);
424439
}
425440
}

xds/src/main/java/io/grpc/xds/XdsNameResolver.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ public void start(Listener2 listener) {
230230
ldsResourceName = XdsClient.canonifyResourceName(ldsResourceName);
231231
callCounterProvider = SharedCallCounterMap.getInstance();
232232

233-
resolveState = new ResolveState(ldsResourceName); // auto starts
233+
resolveState = new ResolveState(ldsResourceName);
234+
resolveState.start();
234235
}
235236

236237
private static String expandPercentS(String template, String replacement) {
@@ -653,10 +654,14 @@ class ResolveState implements XdsDependencyManager.XdsConfigWatcher {
653654
private ResolveState(String ldsResourceName) {
654655
authority = overrideAuthority != null ? overrideAuthority : encodedServiceAuthority;
655656
xdsDependencyManager =
656-
new XdsDependencyManager(xdsClient, this, syncContext, authority, ldsResourceName,
657+
new XdsDependencyManager(xdsClient, syncContext, authority, ldsResourceName,
657658
nameResolverArgs, scheduler);
658659
}
659660

661+
void start() {
662+
xdsDependencyManager.start(this);
663+
}
664+
660665
private void shutdown() {
661666
if (stopped) {
662667
return;

0 commit comments

Comments
 (0)