Skip to content

Commit 845abca

Browse files
authored
Merge pull request #2602 from murgatroid99/grpc-js_pick_first_sticky_tf_reresolve
grpc-js: pick_first: fix happy eyeballs and reresolution in sticky TF mode
2 parents ebc2c3e + d465f83 commit 845abca

File tree

3 files changed

+115
-7
lines changed

3 files changed

+115
-7
lines changed

packages/grpc-js/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@grpc/grpc-js",
3-
"version": "1.9.6",
3+
"version": "1.9.7",
44
"description": "gRPC Library for Node - pure JS implementation",
55
"homepage": "https://grpc.io/",
66
"repository": "https://github.com/grpc/grpc-node/tree/master/packages/grpc-js",

packages/grpc-js/src/load-balancer-pick-first.ts

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,12 @@ export class PickFirstLoadBalancer implements LoadBalancer {
174174
*/
175175
private stickyTransientFailureMode = false;
176176

177+
/**
178+
* Indicates whether we called channelControlHelper.requestReresolution since
179+
* the last call to updateAddressList
180+
*/
181+
private requestedResolutionSinceLastUpdate = false;
182+
177183
/**
178184
* The most recent error reported by any subchannel as it transitioned to
179185
* TRANSIENT_FAILURE.
@@ -216,15 +222,28 @@ export class PickFirstLoadBalancer implements LoadBalancer {
216222
}
217223
}
218224

225+
private requestReresolution() {
226+
this.requestedResolutionSinceLastUpdate = true;
227+
this.channelControlHelper.requestReresolution();
228+
}
229+
219230
private maybeEnterStickyTransientFailureMode() {
220-
if (this.stickyTransientFailureMode) {
231+
if (!this.allChildrenHaveReportedTF()) {
221232
return;
222233
}
223-
if (!this.allChildrenHaveReportedTF()) {
234+
if (!this.requestedResolutionSinceLastUpdate) {
235+
/* Each time we get an update we reset each subchannel's
236+
* hasReportedTransientFailure flag, so the next time we get to this
237+
* point after that, each subchannel has reported TRANSIENT_FAILURE
238+
* at least once since then. That is the trigger for requesting
239+
* reresolution, whether or not the LB policy is already in sticky TF
240+
* mode. */
241+
this.requestReresolution();
242+
}
243+
if (this.stickyTransientFailureMode) {
224244
return;
225245
}
226246
this.stickyTransientFailureMode = true;
227-
this.channelControlHelper.requestReresolution();
228247
for (const { subchannel } of this.children) {
229248
subchannel.startConnecting();
230249
}
@@ -256,7 +275,7 @@ export class PickFirstLoadBalancer implements LoadBalancer {
256275
if (newState !== ConnectivityState.READY) {
257276
this.removeCurrentPick();
258277
this.calculateAndReportNewState();
259-
this.channelControlHelper.requestReresolution();
278+
this.requestReresolution();
260279
}
261280
return;
262281
}
@@ -283,7 +302,7 @@ export class PickFirstLoadBalancer implements LoadBalancer {
283302

284303
private startNextSubchannelConnecting(startIndex: number) {
285304
clearTimeout(this.connectionDelayTimeout);
286-
if (this.triedAllSubchannels || this.stickyTransientFailureMode) {
305+
if (this.triedAllSubchannels) {
287306
return;
288307
}
289308
for (const [index, child] of this.children.entries()) {
@@ -382,6 +401,7 @@ export class PickFirstLoadBalancer implements LoadBalancer {
382401
this.currentSubchannelIndex = 0;
383402
this.children = [];
384403
this.triedAllSubchannels = false;
404+
this.requestedResolutionSinceLastUpdate = false;
385405
}
386406

387407
updateAddressList(

packages/grpc-js/test/test-pick-first.ts

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@ function updateStateCallBackForExpectedStateSequence(
4141
) {
4242
const actualStateSequence: ConnectivityState[] = [];
4343
let lastPicker: Picker | null = null;
44+
let finished = false;
4445
return (connectivityState: ConnectivityState, picker: Picker) => {
46+
if (finished) {
47+
return;
48+
}
4549
// Ignore duplicate state transitions
4650
if (
4751
connectivityState === actualStateSequence[actualStateSequence.length - 1]
@@ -60,6 +64,7 @@ function updateStateCallBackForExpectedStateSequence(
6064
if (
6165
expectedStateSequence[actualStateSequence.length] !== connectivityState
6266
) {
67+
finished = true;
6368
done(
6469
new Error(
6570
`Unexpected state ${
@@ -69,10 +74,12 @@ function updateStateCallBackForExpectedStateSequence(
6974
)}]`
7075
)
7176
);
77+
return;
7278
}
7379
actualStateSequence.push(connectivityState);
7480
lastPicker = picker;
7581
if (actualStateSequence.length === expectedStateSequence.length) {
82+
finished = true;
7683
done();
7784
}
7885
};
@@ -90,7 +97,7 @@ describe('Shuffler', () => {
9097
});
9198
});
9299

93-
describe('pick_first load balancing policy', () => {
100+
describe.only('pick_first load balancing policy', () => {
94101
const config = new PickFirstLoadBalancingConfig(false);
95102
let subchannels: MockSubchannel[] = [];
96103
const baseChannelControlHelper: ChannelControlHelper = {
@@ -462,6 +469,87 @@ describe('pick_first load balancing policy', () => {
462469
});
463470
});
464471
});
472+
it('Should request reresolution every time each child reports TF', done => {
473+
let reresolutionRequestCount = 0;
474+
const targetReresolutionRequestCount = 3;
475+
const currentStartState = ConnectivityState.IDLE;
476+
const channelControlHelper = createChildChannelControlHelper(
477+
baseChannelControlHelper,
478+
{
479+
createSubchannel: (subchannelAddress, subchannelArgs) => {
480+
const subchannel = new MockSubchannel(
481+
subchannelAddressToString(subchannelAddress),
482+
currentStartState
483+
);
484+
subchannels.push(subchannel);
485+
return subchannel;
486+
},
487+
updateState: updateStateCallBackForExpectedStateSequence(
488+
[ConnectivityState.CONNECTING, ConnectivityState.TRANSIENT_FAILURE],
489+
err => setImmediate(() => {
490+
assert.strictEqual(reresolutionRequestCount, targetReresolutionRequestCount);
491+
done(err);
492+
})
493+
),
494+
requestReresolution: () => {
495+
reresolutionRequestCount += 1;
496+
}
497+
}
498+
);
499+
const pickFirst = new PickFirstLoadBalancer(channelControlHelper);
500+
pickFirst.updateAddressList([{ host: 'localhost', port: 1 }], config);
501+
process.nextTick(() => {
502+
subchannels[0].transitionToState(ConnectivityState.TRANSIENT_FAILURE);
503+
process.nextTick(() => {
504+
pickFirst.updateAddressList([{ host: 'localhost', port: 2 }], config);
505+
process.nextTick(() => {
506+
subchannels[1].transitionToState(ConnectivityState.TRANSIENT_FAILURE);
507+
process.nextTick(() => {
508+
pickFirst.updateAddressList([{ host: 'localhost', port: 3 }], config);
509+
process.nextTick(() => {
510+
subchannels[2].transitionToState(ConnectivityState.TRANSIENT_FAILURE);
511+
});
512+
});
513+
});
514+
});
515+
});
516+
});
517+
it('Should request reresolution if the new subchannels are already in TF', done => {
518+
let reresolutionRequestCount = 0;
519+
const targetReresolutionRequestCount = 3;
520+
const currentStartState = ConnectivityState.TRANSIENT_FAILURE;
521+
const channelControlHelper = createChildChannelControlHelper(
522+
baseChannelControlHelper,
523+
{
524+
createSubchannel: (subchannelAddress, subchannelArgs) => {
525+
const subchannel = new MockSubchannel(
526+
subchannelAddressToString(subchannelAddress),
527+
currentStartState
528+
);
529+
subchannels.push(subchannel);
530+
return subchannel;
531+
},
532+
updateState: updateStateCallBackForExpectedStateSequence(
533+
[ConnectivityState.TRANSIENT_FAILURE],
534+
err => setImmediate(() => {
535+
assert.strictEqual(reresolutionRequestCount, targetReresolutionRequestCount);
536+
done(err);
537+
})
538+
),
539+
requestReresolution: () => {
540+
reresolutionRequestCount += 1;
541+
}
542+
}
543+
);
544+
const pickFirst = new PickFirstLoadBalancer(channelControlHelper);
545+
pickFirst.updateAddressList([{ host: 'localhost', port: 1 }], config);
546+
process.nextTick(() => {
547+
pickFirst.updateAddressList([{ host: 'localhost', port: 2 }], config);
548+
process.nextTick(() => {
549+
pickFirst.updateAddressList([{ host: 'localhost', port: 2 }], config);
550+
});
551+
});
552+
});
465553
describe('Address list randomization', () => {
466554
const shuffleConfig = new PickFirstLoadBalancingConfig(true);
467555
it('Should pick different subchannels after multiple updates', done => {

0 commit comments

Comments
 (0)