Skip to content

Commit 7239c0e

Browse files
authored
Merge pull request #2064 from murgatroid99/grpc-js-xds_outlier_detection
grpc-js-xds: Add outlier detection configuration handling
2 parents 0e33286 + b5b0703 commit 7239c0e

File tree

6 files changed

+177
-24
lines changed

6 files changed

+177
-24
lines changed

packages/grpc-js-xds/src/environment.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@
1515
*
1616
*/
1717

18-
export const EXPERIMENTAL_FAULT_INJECTION = (process.env.GRPC_XDS_EXPERIMENTAL_FAULT_INJECTION ?? 'true') === 'true';
18+
export const EXPERIMENTAL_FAULT_INJECTION = (process.env.GRPC_XDS_EXPERIMENTAL_FAULT_INJECTION ?? 'true') === 'true';
19+
export const EXPERIMENTAL_OUTLIER_DETECTION = process.env.GRPC_EXPERIMENTAL_ENABLE_OUTLIER_DETECTION === 'true';

packages/grpc-js-xds/src/load-balancer-cds.ts

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ import LoadBalancer = experimental.LoadBalancer;
2525
import ChannelControlHelper = experimental.ChannelControlHelper;
2626
import registerLoadBalancerType = experimental.registerLoadBalancerType;
2727
import LoadBalancingConfig = experimental.LoadBalancingConfig;
28+
import OutlierDetectionLoadBalancingConfig = experimental.OutlierDetectionLoadBalancingConfig;
29+
import SuccessRateEjectionConfig = experimental.SuccessRateEjectionConfig;
30+
import FailurePercentageEjectionConfig = experimental.FailurePercentageEjectionConfig;
2831
import { EdsLoadBalancingConfig } from './load-balancer-eds';
2932
import { Watcher } from './xds-stream-state/xds-stream-state';
33+
import { OutlierDetection__Output } from './generated/envoy/config/cluster/v3/OutlierDetection';
34+
import { Duration__Output } from './generated/google/protobuf/Duration';
35+
import { EXPERIMENTAL_OUTLIER_DETECTION } from './environment';
3036

3137
const TRACER_NAME = 'cds_balancer';
3238

@@ -64,6 +70,52 @@ export class CdsLoadBalancingConfig implements LoadBalancingConfig {
6470
}
6571
}
6672

73+
function durationToMs(duration: Duration__Output): number {
74+
return (Number(duration.seconds) * 1_000 + duration.nanos / 1_000_000) | 0;
75+
}
76+
77+
function translateOutlierDetectionConfig(outlierDetection: OutlierDetection__Output | null): OutlierDetectionLoadBalancingConfig | undefined {
78+
if (!EXPERIMENTAL_OUTLIER_DETECTION) {
79+
return undefined;
80+
}
81+
if (!outlierDetection) {
82+
/* No-op outlier detection config, with max possible interval and no
83+
* ejection criteria configured. */
84+
return new OutlierDetectionLoadBalancingConfig(~(1<<31), null, null, null, null, null, []);
85+
}
86+
let successRateConfig: Partial<SuccessRateEjectionConfig> | null = null;
87+
/* Success rate ejection is enabled by default, so we only disable it if
88+
* enforcing_success_rate is set and it has the value 0 */
89+
if (!outlierDetection.enforcing_success_rate || outlierDetection.enforcing_success_rate.value > 0) {
90+
successRateConfig = {
91+
enforcement_percentage: outlierDetection.enforcing_success_rate?.value,
92+
minimum_hosts: outlierDetection.success_rate_minimum_hosts?.value,
93+
request_volume: outlierDetection.success_rate_request_volume?.value,
94+
stdev_factor: outlierDetection.success_rate_stdev_factor?.value
95+
};
96+
}
97+
let failurePercentageConfig: Partial<FailurePercentageEjectionConfig> | null = null;
98+
/* Failure percentage ejection is disabled by default, so we only enable it
99+
* if enforcing_failure_percentage is set and it has a value greater than 0 */
100+
if (outlierDetection.enforcing_failure_percentage && outlierDetection.enforcing_failure_percentage.value > 0) {
101+
failurePercentageConfig = {
102+
enforcement_percentage: outlierDetection.enforcing_failure_percentage.value,
103+
minimum_hosts: outlierDetection.failure_percentage_minimum_hosts?.value,
104+
request_volume: outlierDetection.failure_percentage_request_volume?.value,
105+
threshold: outlierDetection.failure_percentage_threshold?.value
106+
}
107+
}
108+
return new OutlierDetectionLoadBalancingConfig(
109+
outlierDetection.interval ? durationToMs(outlierDetection.interval) : null,
110+
outlierDetection.base_ejection_time ? durationToMs(outlierDetection.base_ejection_time) : null,
111+
outlierDetection.max_ejection_time ? durationToMs(outlierDetection.max_ejection_time) : null,
112+
outlierDetection.max_ejection_percent?.value ?? null,
113+
successRateConfig,
114+
failurePercentageConfig,
115+
[]
116+
);
117+
}
118+
67119
export class CdsLoadBalancer implements LoadBalancer {
68120
private childBalancer: ChildLoadBalancerHandler;
69121
private watcher: Watcher<Cluster__Output>;
@@ -90,7 +142,15 @@ export class CdsLoadBalancer implements LoadBalancer {
90142
* used for load reporting as for other xDS operations. Setting
91143
* lrsLoadReportingServerName to the empty string sets that behavior.
92144
* Otherwise, if the field is omitted, load reporting is disabled. */
93-
const edsConfig: EdsLoadBalancingConfig = new EdsLoadBalancingConfig(update.name, [], [], update.eds_cluster_config!.service_name === '' ? undefined : update.eds_cluster_config!.service_name, update.lrs_server?.self ? '' : undefined, maxConcurrentRequests);
145+
const edsConfig: EdsLoadBalancingConfig = new EdsLoadBalancingConfig(
146+
/* cluster= */ update.name,
147+
/* localityPickingPolicy= */ [],
148+
/* endpointPickingPolicy= */ [],
149+
/* edsServiceName= */ update.eds_cluster_config!.service_name === '' ? undefined : update.eds_cluster_config!.service_name,
150+
/* lrsLoadReportingServerName= */update.lrs_server?.self ? '' : undefined,
151+
/* maxConcurrentRequests= */ maxConcurrentRequests,
152+
/* outlierDetection= */ translateOutlierDetectionConfig(update.outlier_detection)
153+
);
94154
trace('Child update EDS config: ' + JSON.stringify(edsConfig));
95155
this.childBalancer.updateAddressList(
96156
[],

packages/grpc-js-xds/src/load-balancer-eds.ts

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ import Filter = experimental.Filter;
3838
import BaseFilter = experimental.BaseFilter;
3939
import FilterFactory = experimental.FilterFactory;
4040
import CallStream = experimental.CallStream;
41+
import OutlierDetectionLoadBalancingConfig = experimental.OutlierDetectionLoadBalancingConfig;
42+
import { EXPERIMENTAL_OUTLIER_DETECTION } from './environment';
4143

4244
const TRACER_NAME = 'eds_balancer';
4345

@@ -71,12 +73,15 @@ export class EdsLoadBalancingConfig implements LoadBalancingConfig {
7173
if (this.lrsLoadReportingServerName !== undefined) {
7274
jsonObj.lrs_load_reporting_server_name = this.lrsLoadReportingServerName;
7375
}
76+
if (this.outlierDetection !== undefined) {
77+
jsonObj.outlier_detection = this.outlierDetection.toJsonObject();
78+
}
7479
return {
7580
[TYPE_NAME]: jsonObj
7681
};
7782
}
7883

79-
constructor(private cluster: string, private localityPickingPolicy: LoadBalancingConfig[], private endpointPickingPolicy: LoadBalancingConfig[], private edsServiceName?: string, private lrsLoadReportingServerName?: string, maxConcurrentRequests?: number) {
84+
constructor(private cluster: string, private localityPickingPolicy: LoadBalancingConfig[], private endpointPickingPolicy: LoadBalancingConfig[], private edsServiceName?: string, private lrsLoadReportingServerName?: string, maxConcurrentRequests?: number, private outlierDetection?: OutlierDetectionLoadBalancingConfig) {
8085
this.maxConcurrentRequests = maxConcurrentRequests ?? DEFAULT_MAX_CONCURRENT_REQUESTS;
8186
}
8287

@@ -104,6 +109,10 @@ export class EdsLoadBalancingConfig implements LoadBalancingConfig {
104109
return this.maxConcurrentRequests;
105110
}
106111

112+
getOutlierDetection() {
113+
return this.outlierDetection;
114+
}
115+
107116
static createFromJson(obj: any): EdsLoadBalancingConfig {
108117
if (!('cluster' in obj && typeof obj.cluster === 'string')) {
109118
throw new Error('eds config must have a string field cluster');
@@ -123,7 +132,17 @@ export class EdsLoadBalancingConfig implements LoadBalancingConfig {
123132
if ('max_concurrent_requests' in obj && (!obj.max_concurrent_requests === undefined || typeof obj.max_concurrent_requests === 'number')) {
124133
throw new Error('eds config max_concurrent_requests must be a number if provided');
125134
}
126-
return new EdsLoadBalancingConfig(obj.cluster, obj.locality_picking_policy.map(validateLoadBalancingConfig), obj.endpoint_picking_policy.map(validateLoadBalancingConfig), obj.eds_service_name, obj.lrs_load_reporting_server_name, obj.max_concurrent_requests);
135+
let validatedOutlierDetectionConfig: OutlierDetectionLoadBalancingConfig | undefined = undefined;
136+
if (EXPERIMENTAL_OUTLIER_DETECTION) {
137+
if ('outlier_detection' in obj) {
138+
const outlierDetectionConfig = validateLoadBalancingConfig(obj.outlier_detection);
139+
if (!(outlierDetectionConfig instanceof OutlierDetectionLoadBalancingConfig)) {
140+
throw new Error('eds config outlier_detection must be a valid outlier detection config if provided');
141+
}
142+
validatedOutlierDetectionConfig = outlierDetectionConfig;
143+
}
144+
}
145+
return new EdsLoadBalancingConfig(obj.cluster, obj.locality_picking_policy.map(validateLoadBalancingConfig), obj.endpoint_picking_policy.map(validateLoadBalancingConfig), obj.eds_service_name, obj.lrs_load_reporting_server_name, obj.max_concurrent_requests, validatedOutlierDetectionConfig);
127146
}
128147
}
129148

@@ -449,10 +468,15 @@ export class EdsLoadBalancer implements LoadBalancer {
449468
}
450469
}
451470

471+
const weightedTargetConfig = new WeightedTargetLoadBalancingConfig(childTargets);
472+
let outlierDetectionConfig: OutlierDetectionLoadBalancingConfig | undefined;
473+
if (EXPERIMENTAL_OUTLIER_DETECTION) {
474+
outlierDetectionConfig = this.lastestConfig.getOutlierDetection()?.copyWithChildPolicy([weightedTargetConfig]);
475+
}
476+
const priorityChildConfig = outlierDetectionConfig ?? weightedTargetConfig;
477+
452478
priorityChildren.set(newPriorityName, {
453-
config: [
454-
new WeightedTargetLoadBalancingConfig(childTargets),
455-
],
479+
config: [priorityChildConfig],
456480
});
457481
}
458482
/* Contract the priority names array if it is sparse. This config only

packages/grpc-js-xds/src/xds-stream-state/cds-state.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@
1616
*/
1717

1818
import { experimental, logVerbosity, StatusObject } from "@grpc/grpc-js";
19+
import { EXPERIMENTAL_OUTLIER_DETECTION } from "../environment";
1920
import { Cluster__Output } from "../generated/envoy/config/cluster/v3/Cluster";
2021
import { Any__Output } from "../generated/google/protobuf/Any";
22+
import { Duration__Output } from "../generated/google/protobuf/Duration";
23+
import { UInt32Value__Output } from "../generated/google/protobuf/UInt32Value";
2124
import { EdsState } from "./eds-state";
2225
import { HandleResponseResult, RejectedResourceEntry, ResourcePair, Watcher, XdsStreamState } from "./xds-stream-state";
2326

@@ -102,6 +105,26 @@ export class CdsState implements XdsStreamState<Cluster__Output> {
102105
return Array.from(this.watchers.keys());
103106
}
104107

108+
private validateNonnegativeDuration(duration: Duration__Output | null): boolean {
109+
if (!duration) {
110+
return true;
111+
}
112+
/* The maximum values here come from the official Protobuf documentation:
113+
* https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Duration
114+
*/
115+
return Number(duration.seconds) >= 0 &&
116+
Number(duration.seconds) <= 315_576_000_000 &&
117+
duration.nanos >= 0 &&
118+
duration.nanos <= 999_999_999;
119+
}
120+
121+
private validatePercentage(percentage: UInt32Value__Output | null): boolean {
122+
if (!percentage) {
123+
return true;
124+
}
125+
return percentage.value >=0 && percentage.value <= 100;
126+
}
127+
105128
private validateResponse(message: Cluster__Output): boolean {
106129
if (message.type !== 'EDS') {
107130
return false;
@@ -117,6 +140,31 @@ export class CdsState implements XdsStreamState<Cluster__Output> {
117140
return false;
118141
}
119142
}
143+
if (EXPERIMENTAL_OUTLIER_DETECTION) {
144+
if (message.outlier_detection) {
145+
if (!this.validateNonnegativeDuration(message.outlier_detection.interval)) {
146+
return false;
147+
}
148+
if (!this.validateNonnegativeDuration(message.outlier_detection.base_ejection_time)) {
149+
return false;
150+
}
151+
if (!this.validateNonnegativeDuration(message.outlier_detection.max_ejection_time)) {
152+
return false;
153+
}
154+
if (!this.validatePercentage(message.outlier_detection.max_ejection_percent)) {
155+
return false;
156+
}
157+
if (!this.validatePercentage(message.outlier_detection.enforcing_success_rate)) {
158+
return false;
159+
}
160+
if (!this.validatePercentage(message.outlier_detection.failure_percentage_threshold)) {
161+
return false;
162+
}
163+
if (!this.validatePercentage(message.outlier_detection.enforcing_failure_percentage)) {
164+
return false;
165+
}
166+
}
167+
}
120168
return true;
121169
}
122170

packages/grpc-js/src/experimental.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export {
66
ConfigSelector,
77
} from './resolver';
88
export { GrpcUri, uriToString } from './uri-parser';
9-
export { Duration } from './duration';
9+
export { Duration, durationToMs } from './duration';
1010
export { ServiceConfig } from './service-config';
1111
export { BackoffTimeout } from './backoff-timeout';
1212
export {
@@ -35,4 +35,5 @@ export { Call as CallStream } from './call-stream';
3535
export { Filter, BaseFilter, FilterFactory } from './filter';
3636
export { FilterStackFactory } from './filter-stack';
3737
export { registerAdminService } from './admin';
38-
export { SubchannelInterface, BaseSubchannelWrapper, ConnectivityStateListener } from './subchannel-interface'
38+
export { SubchannelInterface, BaseSubchannelWrapper, ConnectivityStateListener } from './subchannel-interface';
39+
export { OutlierDetectionLoadBalancingConfig, SuccessRateEjectionConfig, FailurePercentageEjectionConfig } from './load-balancer-outlier-detection';

packages/grpc-js/src/load-balancer-outlier-detection.ts

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ const TYPE_NAME = 'outlier_detection';
3434

3535
const OUTLIER_DETECTION_ENABLED = process.env.GRPC_EXPERIMENTAL_ENABLE_OUTLIER_DETECTION === 'true';
3636

37-
interface SuccessRateEjectionConfig {
37+
export interface SuccessRateEjectionConfig {
3838
readonly stdev_factor: number;
3939
readonly enforcement_percentage: number;
4040
readonly minimum_hosts: number;
4141
readonly request_volume: number;
4242
}
4343

44-
interface FailurePercentageEjectionConfig {
44+
export interface FailurePercentageEjectionConfig {
4545
readonly threshold: number;
4646
readonly enforcement_percentage: number;
4747
readonly minimum_hosts: number;
@@ -92,15 +92,29 @@ function validatePercentage(obj: any, fieldName: string, objectName?: string) {
9292
}
9393

9494
export class OutlierDetectionLoadBalancingConfig implements LoadBalancingConfig {
95+
private readonly intervalMs: number;
96+
private readonly baseEjectionTimeMs: number;
97+
private readonly maxEjectionTimeMs: number;
98+
private readonly maxEjectionPercent: number;
99+
private readonly successRateEjection: SuccessRateEjectionConfig | null;
100+
private readonly failurePercentageEjection: FailurePercentageEjectionConfig | null;
101+
95102
constructor(
96-
private readonly intervalMs: number,
97-
private readonly baseEjectionTimeMs: number,
98-
private readonly maxEjectionTimeMs: number,
99-
private readonly maxEjectionPercent: number,
100-
private readonly successRateEjection: SuccessRateEjectionConfig | null,
101-
private readonly failurePercentageEjection: FailurePercentageEjectionConfig | null,
103+
intervalMs: number | null,
104+
baseEjectionTimeMs: number | null,
105+
maxEjectionTimeMs: number | null,
106+
maxEjectionPercent: number | null,
107+
successRateEjection: Partial<SuccessRateEjectionConfig> | null,
108+
failurePercentageEjection: Partial<FailurePercentageEjectionConfig> | null,
102109
private readonly childPolicy: LoadBalancingConfig[]
103-
) {}
110+
) {
111+
this.intervalMs = intervalMs ?? 10_000;
112+
this.baseEjectionTimeMs = baseEjectionTimeMs ?? 30_000;
113+
this.maxEjectionTimeMs = maxEjectionTimeMs ?? 300_000;
114+
this.maxEjectionPercent = maxEjectionPercent ?? 10;
115+
this.successRateEjection = successRateEjection ? {...defaultSuccessRateEjectionConfig, ...successRateEjection} : null;
116+
this.failurePercentageEjection = failurePercentageEjection ? {...defaultFailurePercentageEjectionConfig, ...failurePercentageEjection}: null;
117+
}
104118
getLoadBalancerName(): string {
105119
return TYPE_NAME;
106120
}
@@ -137,6 +151,11 @@ export class OutlierDetectionLoadBalancingConfig implements LoadBalancingConfig
137151
getChildPolicy(): LoadBalancingConfig[] {
138152
return this.childPolicy;
139153
}
154+
155+
copyWithChildPolicy(childPolicy: LoadBalancingConfig[]): OutlierDetectionLoadBalancingConfig {
156+
return new OutlierDetectionLoadBalancingConfig(this.intervalMs, this.baseEjectionTimeMs, this.maxEjectionTimeMs, this.maxEjectionPercent, this.successRateEjection, this.failurePercentageEjection, childPolicy);
157+
}
158+
140159
static createFromJson(obj: any): OutlierDetectionLoadBalancingConfig {
141160
validatePositiveDuration(obj, 'interval');
142161
validatePositiveDuration(obj, 'base_ejection_time');
@@ -162,12 +181,12 @@ export class OutlierDetectionLoadBalancingConfig implements LoadBalancingConfig
162181
}
163182

164183
return new OutlierDetectionLoadBalancingConfig(
165-
obj.interval ? durationToMs(obj.interval) : 10_000,
166-
obj.base_ejection_time ? durationToMs(obj.base_ejection_time) : 30_000,
167-
obj.max_ejection_time ? durationToMs(obj.max_ejection_time) : 300_000,
168-
obj.max_ejection_percent ?? 10,
169-
obj.success_rate_ejection ? {...defaultSuccessRateEjectionConfig, ...obj.success_rate_ejection} : null,
170-
obj.failure_percentage_ejection ? {...defaultFailurePercentageEjectionConfig, ...obj.failure_percentage_ejection} : null,
184+
obj.interval ? durationToMs(obj.interval) : null,
185+
obj.base_ejection_time ? durationToMs(obj.base_ejection_time) : null,
186+
obj.max_ejection_time ? durationToMs(obj.max_ejection_time) : null,
187+
obj.max_ejection_percent ?? null,
188+
obj.success_rate_ejection,
189+
obj.failure_percentage_ejection,
171190
obj.child_policy.map(validateLoadBalancingConfig)
172191
);
173192
}

0 commit comments

Comments
 (0)