Skip to content

Commit 5adaa84

Browse files
authored
feat: Load balancing options for BigtableChannelPool (#2667)
Experiments have shown that least-in-flight balancing is particularly effective when some channels or backends have problems. Depends on: https://togithub.com/googleapis/java-bigtable/pull/2651
1 parent 337e432 commit 5adaa84

File tree

3 files changed

+156
-15
lines changed

3 files changed

+156
-15
lines changed

google-cloud-bigtable/clirr-ignored-differences.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,4 +456,16 @@
456456
<method>*sendPrimeRequestsAsync*</method>
457457
<to>com.google.api.core.ApiFuture</to>
458458
</difference>
459+
<difference>
460+
<!-- InternalApi was updated -->
461+
<differenceType>7013</differenceType>
462+
<className>com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPoolSettings</className>
463+
<method>com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPoolSettings$LoadBalancingStrategy getLoadBalancingStrategy()</method>
464+
</difference>
465+
<difference>
466+
<!-- InternalApi was updated -->
467+
<differenceType>7013</differenceType>
468+
<className>com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPoolSettings$Builder</className>
469+
<method>com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPoolSettings$Builder setLoadBalancingStrategy(com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPoolSettings$LoadBalancingStrategy)</method>
470+
</difference>
459471
</differences>

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import java.time.Clock;
3535
import java.util.ArrayList;
3636
import java.util.List;
37+
import java.util.Random;
3738
import java.util.concurrent.CancellationException;
3839
import java.util.concurrent.ConcurrentLinkedQueue;
3940
import java.util.concurrent.Executors;
@@ -42,6 +43,7 @@
4243
import java.util.concurrent.atomic.AtomicBoolean;
4344
import java.util.concurrent.atomic.AtomicInteger;
4445
import java.util.concurrent.atomic.AtomicReference;
46+
import java.util.function.Supplier;
4547
import java.util.logging.Level;
4648
import java.util.logging.Logger;
4749
import javax.annotation.Nullable;
@@ -71,6 +73,8 @@ public class BigtableChannelPool extends ManagedChannel {
7173
private final ChannelPoolHealthChecker channelPoolHealthChecker;
7274
private final AtomicInteger indexTicker = new AtomicInteger();
7375
private final String authority;
76+
private final Random rng = new Random();
77+
private final Supplier<Integer> picker;
7478

7579
public static BigtableChannelPool create(
7680
BigtableChannelPoolSettings settings,
@@ -113,6 +117,23 @@ public static BigtableChannelPool create(
113117

114118
entries.set(initialListBuilder.build());
115119
authority = entries.get().get(0).channel.authority();
120+
121+
switch (settings.getLoadBalancingStrategy()) {
122+
case ROUND_ROBIN:
123+
picker = this::pickEntryIndexRoundRobin;
124+
break;
125+
case LEAST_IN_FLIGHT:
126+
picker = this::pickEntryIndexLeastInFlight;
127+
break;
128+
case POWER_OF_TWO_LEAST_IN_FLIGHT:
129+
picker = this::pickEntryIndexPowerOfTwoLeastInFlight;
130+
break;
131+
default:
132+
throw new IllegalStateException(
133+
String.format(
134+
"Unknown load balancing strategy %s", settings.getLoadBalancingStrategy()));
135+
}
136+
116137
this.executor = executor;
117138

118139
if (!settings.isStaticSize()) {
@@ -138,19 +159,74 @@ public String authority() {
138159
}
139160

140161
/**
141-
* Create a {@link ClientCall} on a Channel from the pool chosen in a round-robin fashion to the
142-
* remote operation specified by the given {@link MethodDescriptor}. The returned {@link
143-
* ClientCall} does not trigger any remote behavior until {@link
144-
* ClientCall#start(ClientCall.Listener, io.grpc.Metadata)} is invoked.
162+
* Create a {@link ClientCall} on a Channel from the pool to the remote operation specified by the
163+
* given {@link MethodDescriptor}. The returned {@link ClientCall} does not trigger any remote
164+
* behavior until {@link ClientCall#start(ClientCall.Listener, io.grpc.Metadata)} is invoked.
145165
*/
146166
@Override
147167
public <ReqT, RespT> ClientCall<ReqT, RespT> newCall(
148168
MethodDescriptor<ReqT, RespT> methodDescriptor, CallOptions callOptions) {
149-
return getChannel(indexTicker.getAndIncrement()).newCall(methodDescriptor, callOptions);
169+
return new AffinityChannel(pickEntryIndex()).newCall(methodDescriptor, callOptions);
170+
}
171+
172+
/**
173+
* Pick the index of an entry to use for the next call. The returned value *should* be within
174+
* range, but callers should not assume that this is always the case as race conditions are
175+
* possible.
176+
*/
177+
private int pickEntryIndex() {
178+
return picker.get();
179+
}
180+
181+
/** Pick an entry using the Round Robin algorithm. */
182+
private int pickEntryIndexRoundRobin() {
183+
return Math.abs(indexTicker.getAndIncrement() % entries.get().size());
184+
}
185+
186+
/** Pick an entry at random. */
187+
private int pickEntryIndexRandom() {
188+
return rng.nextInt(entries.get().size());
150189
}
151190

152-
Channel getChannel(int affinity) {
153-
return new AffinityChannel(affinity);
191+
/** Pick an entry using the least-in-flight algorithm. */
192+
private int pickEntryIndexLeastInFlight() {
193+
List<Entry> localEntries = entries.get();
194+
int minRpcs = Integer.MAX_VALUE;
195+
List<Integer> candidates = new ArrayList<>();
196+
197+
for (int i = 0; i < localEntries.size(); i++) {
198+
Entry entry = localEntries.get(i);
199+
int rpcs = entry.outstandingRpcs.get();
200+
if (rpcs < minRpcs) {
201+
minRpcs = rpcs;
202+
candidates.clear();
203+
candidates.add(i);
204+
} else if (rpcs == minRpcs) {
205+
candidates.add(i);
206+
}
207+
}
208+
// If there are multiple matching entries, pick one at random.
209+
return candidates.get(rng.nextInt(candidates.size()));
210+
}
211+
212+
/** Pick an entry using the power-of-two algorithm. */
213+
private int pickEntryIndexPowerOfTwoLeastInFlight() {
214+
List<Entry> localEntries = entries.get();
215+
int choice1 = pickEntryIndexRandom();
216+
int choice2 = pickEntryIndexRandom();
217+
if (choice1 == choice2) {
218+
// Try to pick two different entries. If this picks the same entry again, it's likely that
219+
// there's only one healthy channel in the pool and we should proceed anyway.
220+
choice2 = pickEntryIndexRandom();
221+
}
222+
223+
Entry entry1 = localEntries.get(choice1);
224+
Entry entry2 = localEntries.get(choice2);
225+
return entry1.outstandingRpcs.get() < entry2.outstandingRpcs.get() ? choice1 : choice2;
226+
}
227+
228+
Channel getChannel(int index) {
229+
return new AffinityChannel(index);
154230
}
155231

156232
/** {@inheritDoc} */
@@ -395,7 +471,9 @@ void refresh() {
395471
* Get and retain a Channel Entry. The returned Entry will have its rpc count incremented,
396472
* preventing it from getting recycled.
397473
*/
398-
Entry getRetainedEntry(int affinity) {
474+
private Entry getRetainedEntry(int affinity) {
475+
// If an entry is not retainable, that usually means that it's about to be replaced and if we
476+
// retry we should get a new useable entry.
399477
// The maximum number of concurrent calls to this method for any given time span is at most 2,
400478
// so the loop can actually be 2 times. But going for 5 times for a safety margin for potential
401479
// code evolving
@@ -543,10 +621,10 @@ private void shutdown() {
543621

544622
/** Thin wrapper to ensure that new calls are properly reference counted. */
545623
private class AffinityChannel extends Channel {
546-
private final int affinity;
624+
private final int index;
547625

548-
public AffinityChannel(int affinity) {
549-
this.affinity = affinity;
626+
public AffinityChannel(int index) {
627+
this.index = index;
550628
}
551629

552630
@Override
@@ -557,9 +635,7 @@ public String authority() {
557635
@Override
558636
public <RequestT, ResponseT> ClientCall<RequestT, ResponseT> newCall(
559637
MethodDescriptor<RequestT, ResponseT> methodDescriptor, CallOptions callOptions) {
560-
561-
Entry entry = getRetainedEntry(affinity);
562-
638+
Entry entry = getRetainedEntry(index);
563639
return new ReleasingClientCall<>(entry.channel.newCall(methodDescriptor, callOptions), entry);
564640
}
565641
}

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPoolSettings.java

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,14 @@
1616
package com.google.cloud.bigtable.gaxx.grpc;
1717

1818
import com.google.api.core.BetaApi;
19+
import com.google.api.core.InternalApi;
1920
import com.google.api.gax.grpc.ChannelPoolSettings;
2021
import com.google.auto.value.AutoValue;
22+
import com.google.common.annotations.VisibleForTesting;
2123
import com.google.common.base.Preconditions;
24+
import com.google.common.base.Strings;
2225
import java.time.Duration;
26+
import java.util.logging.Logger;
2327

2428
/**
2529
* Settings to control {@link BigtableChannelPool} behavior.
@@ -41,12 +45,33 @@
4145
@BetaApi("surface for channel pool sizing is not yet stable")
4246
@AutoValue
4347
public abstract class BigtableChannelPoolSettings {
48+
@VisibleForTesting
49+
static final Logger LOG = Logger.getLogger(BigtableChannelPoolSettings.class.getName());
50+
4451
/** How often to check and possibly resize the {@link BigtableChannelPool}. */
4552
static final Duration RESIZE_INTERVAL = Duration.ofMinutes(1);
4653

4754
/** The maximum number of channels that can be added or removed at a time. */
4855
static final int MAX_RESIZE_DELTA = 2;
4956

57+
/** Environment variable used to set load balancing strategy. */
58+
private static final String CBT_LOAD_BALANCING_STRATEGY_ENV_VAR = "CBT_LOAD_BALANCING_STRATEGY";
59+
60+
/** Load balancing strategy to use if environment variable is unset or invalid. */
61+
private static final LoadBalancingStrategy DEFAULT_LOAD_BALANCING_STRATEGY =
62+
LoadBalancingStrategy.ROUND_ROBIN;
63+
64+
/** Supported load-balancing strategies. */
65+
public enum LoadBalancingStrategy {
66+
// Sequentially iterate across all channels.
67+
ROUND_ROBIN,
68+
// Pick the channel with the fewest in-flight requests. If multiple channels match, pick at
69+
// random.
70+
LEAST_IN_FLIGHT,
71+
// Out of two random channels, pick the channel with the fewest in-flight requests.
72+
POWER_OF_TWO_LEAST_IN_FLIGHT,
73+
}
74+
5075
/**
5176
* Threshold to start scaling down the channel pool.
5277
*
@@ -95,6 +120,10 @@ public abstract class BigtableChannelPoolSettings {
95120
*/
96121
public abstract boolean isPreemptiveRefreshEnabled();
97122

123+
/** The load balancing strategy to use for distributing RPCs across channels. */
124+
@InternalApi("Use CBT_LOAD_BALANCING_STRATEGY environment variable")
125+
public abstract LoadBalancingStrategy getLoadBalancingStrategy();
126+
98127
/**
99128
* Helper to check if the {@link BigtableChannelPool} implementation can skip dynamic size logic
100129
*/
@@ -111,6 +140,24 @@ boolean isStaticSize() {
111140
return false;
112141
}
113142

143+
/**
144+
* Use environment variable CBT_LOAD_BALANCING_STRATEGY to pick a load-balancing strategy.
145+
*
146+
* @return load-balancing strategy to use.
147+
*/
148+
private static LoadBalancingStrategy loadBalancingStrategyFromEnv() {
149+
String strategyString = System.getenv(CBT_LOAD_BALANCING_STRATEGY_ENV_VAR);
150+
if (Strings.isNullOrEmpty(strategyString)) {
151+
return DEFAULT_LOAD_BALANCING_STRATEGY;
152+
}
153+
try {
154+
return LoadBalancingStrategy.valueOf(strategyString.trim().toUpperCase());
155+
} catch (IllegalArgumentException e) {
156+
throw new IllegalStateException(
157+
String.format("Invalid load-balancing strategy %s", strategyString));
158+
}
159+
}
160+
114161
public abstract Builder toBuilder();
115162

116163
public static BigtableChannelPoolSettings copyFrom(ChannelPoolSettings externalSettings) {
@@ -121,6 +168,7 @@ public static BigtableChannelPoolSettings copyFrom(ChannelPoolSettings externalS
121168
.setMaxChannelCount(externalSettings.getMaxChannelCount())
122169
.setInitialChannelCount(externalSettings.getInitialChannelCount())
123170
.setPreemptiveRefreshEnabled(externalSettings.isPreemptiveRefreshEnabled())
171+
.setLoadBalancingStrategy(loadBalancingStrategyFromEnv())
124172
.build();
125173
}
126174

@@ -131,6 +179,7 @@ public static BigtableChannelPoolSettings staticallySized(int size) {
131179
.setMaxRpcsPerChannel(Integer.MAX_VALUE)
132180
.setMinChannelCount(size)
133181
.setMaxChannelCount(size)
182+
.setLoadBalancingStrategy(loadBalancingStrategyFromEnv())
134183
.build();
135184
}
136185

@@ -141,7 +190,8 @@ public static Builder builder() {
141190
.setMaxChannelCount(200)
142191
.setMinRpcsPerChannel(0)
143192
.setMaxRpcsPerChannel(Integer.MAX_VALUE)
144-
.setPreemptiveRefreshEnabled(false);
193+
.setPreemptiveRefreshEnabled(false)
194+
.setLoadBalancingStrategy(loadBalancingStrategyFromEnv());
145195
}
146196

147197
@AutoValue.Builder
@@ -158,6 +208,9 @@ public abstract static class Builder {
158208

159209
public abstract Builder setPreemptiveRefreshEnabled(boolean enabled);
160210

211+
@InternalApi("Use CBT_LOAD_BALANCING_STRATEGY environment variable")
212+
public abstract Builder setLoadBalancingStrategy(LoadBalancingStrategy strategy);
213+
161214
abstract BigtableChannelPoolSettings autoBuild();
162215

163216
public BigtableChannelPoolSettings build() {

0 commit comments

Comments
 (0)