-
Notifications
You must be signed in to change notification settings - Fork 58
fix(flagd): improve stream observer, refine retry policy; don't use retry to avoid busy loop #1590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
893cd1c
67b190d
5b47a15
e972a80
7991058
f4a93d6
774bdee
acab1f3
3ad54de
fc963ce
c7c1a3a
0c41114
0c29e37
8baba19
ddffc09
1920779
86de034
9a2ef7b
95914e6
d0286a9
455b362
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -98,10 +98,18 @@ public class FlagdOptions { | |
| @Builder.Default | ||
| private int deadline = fallBackToEnvOrDefault(Config.DEADLINE_MS_ENV_VAR_NAME, Config.DEFAULT_DEADLINE); | ||
|
|
||
| /** | ||
| * Max stream retry backoff in milliseconds. | ||
| */ | ||
| @Builder.Default | ||
| private int retryBackoffMaxMs = | ||
| fallBackToEnvOrDefault(Config.FLAGD_RETRY_BACKOFF_MAX_MS_VAR_NAME, Config.DEFAULT_MAX_RETRY_BACKOFF); | ||
|
||
|
|
||
| /** | ||
| * Streaming connection deadline in milliseconds. | ||
| * Set to 0 to disable the deadline. | ||
| * Defaults to 600000 (10 minutes); recommended to prevent infrastructure from killing idle connections. | ||
| * Defaults to 600000 (10 minutes); recommended to prevent infrastructure from | ||
| * killing idle connections. | ||
| */ | ||
| @Builder.Default | ||
| private int streamDeadlineMs = | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,67 +24,57 @@ | |
| import javax.net.ssl.SSLException; | ||
|
|
||
| /** gRPC channel builder helper. */ | ||
| @SuppressFBWarnings(value = "SE_BAD_FIELD", justification = "we don't care to serialize this") | ||
| public class ChannelBuilder { | ||
|
|
||
| /** | ||
| * Controls retry (not-reconnection) policy for failed RPCs. | ||
| */ | ||
| @SuppressWarnings({"unchecked", "rawtypes"}) | ||
| static final Map<String, ?> SERVICE_CONFIG_WITH_RETRY = new HashMap() { | ||
| { | ||
| put("methodConfig", Arrays.asList(new HashMap() { | ||
| { | ||
| put( | ||
| "name", | ||
| Arrays.asList( | ||
| new HashMap() { | ||
| { | ||
| put("service", "flagd.sync.v1.FlagSyncService"); | ||
| } | ||
| }, | ||
| new HashMap() { | ||
| { | ||
| put("service", "flagd.evaluation.v1.Service"); | ||
| } | ||
| })); | ||
| put("retryPolicy", new HashMap() { | ||
| { | ||
| // 1 + 2 + 4 | ||
| put("maxAttempts", 3.0); // types used here are important, need to be doubles | ||
| put("initialBackoff", "1s"); | ||
| put("maxBackoff", "5s"); | ||
| put("backoffMultiplier", 2.0); | ||
| // status codes to retry on: | ||
| put( | ||
| "retryableStatusCodes", | ||
| Arrays.asList( | ||
| /* | ||
| * All codes are retryable except OK and DEADLINE_EXCEEDED since | ||
| * any others not listed here cause a very tight loop of retries. | ||
| * DEADLINE_EXCEEDED is typically a result of a client specified deadline, | ||
| * and definitionally should not result in a tight loop (it's a timeout). | ||
| */ | ||
| Code.CANCELLED.toString(), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed - these were only here to slow down our loop, which we do with a sleep now. |
||
| Code.UNKNOWN.toString(), | ||
| Code.INVALID_ARGUMENT.toString(), | ||
| Code.NOT_FOUND.toString(), | ||
| Code.ALREADY_EXISTS.toString(), | ||
| Code.PERMISSION_DENIED.toString(), | ||
| Code.RESOURCE_EXHAUSTED.toString(), | ||
| Code.FAILED_PRECONDITION.toString(), | ||
| Code.ABORTED.toString(), | ||
| Code.OUT_OF_RANGE.toString(), | ||
| Code.UNIMPLEMENTED.toString(), | ||
| Code.INTERNAL.toString(), | ||
| Code.UNAVAILABLE.toString(), | ||
| Code.DATA_LOSS.toString(), | ||
| Code.UNAUTHENTICATED.toString())); | ||
| } | ||
| }); | ||
| } | ||
| })); | ||
| } | ||
| }; | ||
| static Map<String, ?> buildRetryPolicy(final FlagdOptions options) { | ||
| return new HashMap() { | ||
| { | ||
| put("methodConfig", Arrays.asList(new HashMap() { | ||
| { | ||
| put( | ||
| "name", | ||
| Arrays.asList( | ||
| new HashMap() { | ||
| { | ||
| put("service", "flagd.sync.v1.FlagSyncService"); | ||
| } | ||
| }, | ||
| new HashMap() { | ||
| { | ||
| put("service", "flagd.evaluation.v1.Service"); | ||
| } | ||
| })); | ||
| put("retryPolicy", new HashMap() { | ||
| { | ||
| // 1 + 2 + 4 | ||
| put("maxAttempts", 3.0); // types used here are important, need to be doubles | ||
| put("initialBackoff", "1s"); | ||
| put( | ||
| "maxBackoff", | ||
| options.getRetryBackoffMaxMs() >= 1000 | ||
| ? String.format("%ds", options.getRetryBackoffMaxMs() / 1000) | ||
| : "1s"); | ||
| put("backoffMultiplier", 2.0); | ||
| // status codes to retry on: | ||
| put( | ||
| "retryableStatusCodes", | ||
| Arrays.asList( | ||
| /* | ||
| * As per gRPC spec, the following status codes are safe to retry: | ||
| * UNAVAILABLE, UNKNOWN, | ||
| */ | ||
| Code.UNKNOWN.toString(), Code.UNAVAILABLE.toString())); | ||
| } | ||
| }); | ||
| } | ||
| })); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| private ChannelBuilder() {} | ||
|
|
||
|
|
@@ -109,7 +99,7 @@ public static ManagedChannel nettyChannel(final FlagdOptions options) { | |
| .eventLoopGroup(new MultiThreadIoEventLoopGroup(EpollIoHandler.newFactory())) | ||
| .channelType(EpollDomainSocketChannel.class) | ||
| .usePlaintext() | ||
| .defaultServiceConfig(SERVICE_CONFIG_WITH_RETRY) | ||
| .defaultServiceConfig(buildRetryPolicy(options)) | ||
| .enableRetry() | ||
| .build(); | ||
| } | ||
|
|
@@ -155,7 +145,7 @@ public static ManagedChannel nettyChannel(final FlagdOptions options) { | |
| builder.intercept(new FlagdGrpcInterceptor(options.getOpenTelemetry())); | ||
| } | ||
|
|
||
| return builder.defaultServiceConfig(SERVICE_CONFIG_WITH_RETRY) | ||
| return builder.defaultServiceConfig(buildRetryPolicy(options)) | ||
| .enableRetry() | ||
| .build(); | ||
| } catch (SSLException ssle) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.