Skip to content

Commit 2135554

Browse files
committed
rpc: fix various test bugs
TLDR: Prior to this patch, multiple unit tests in the `rpc` package were misconfiguring their heartbeat interval and timeout. This patch fixes this. The issue was that there were two separate locations for these tunable parameters: - `RPCHeartbeat{Interval,Timeout}` fields in `ContextOptions` (previously via `*base.Config`, recently as direct fields in `ContextOptions`) - also, two private fields `heartbeat{Interval,Timeout}` The former was copied into the latter during `NewContext()`. Only the latter (the private fields) matter for the actual behavior. Prior to this commit, the tests would call `NewContext()` and then afterwards would override the fields in `ContextOptions`. These overrides were thus ineffective because the value was already copied by that time. In addition to this design bug, there were a couple of additional bugs lurking: the overrides defined by the tests were problematic in some cases. If these overrides had been effective, the tests would break. The problem was discovered after 8d2d2bf merged, because in that commit some of the overrides from broken tests were taken on board and became effective. This started causing flakes. The fix is to remove the ambiguity in configuration and remove the problematic overrides from offending tests. Release note: None
1 parent a462321 commit 2135554

File tree

4 files changed

+8
-20
lines changed

4 files changed

+8
-20
lines changed

pkg/rpc/context.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,6 @@ type Context struct {
213213
RemoteClocks *RemoteClockMonitor
214214
MasterCtx context.Context // cancel on stopper quiesce
215215

216-
heartbeatInterval time.Duration
217-
heartbeatTimeout time.Duration
218-
219216
rpcCompression bool
220217

221218
localInternalClient RestrictedInternalClient
@@ -531,13 +528,11 @@ func NewContext(ctx context.Context, opts ContextOptions) *Context {
531528
secCtx.useNodeAuth = opts.UseNodeAuth
532529

533530
rpcCtx := &Context{
534-
ContextOptions: opts,
535-
SecurityContext: secCtx,
536-
rpcCompression: enableRPCCompression,
537-
MasterCtx: masterCtx,
538-
metrics: makeMetrics(),
539-
heartbeatInterval: opts.RPCHeartbeatInterval,
540-
heartbeatTimeout: opts.RPCHeartbeatTimeout,
531+
ContextOptions: opts,
532+
SecurityContext: secCtx,
533+
rpcCompression: enableRPCCompression,
534+
MasterCtx: masterCtx,
535+
metrics: makeMetrics(),
541536
}
542537

543538
rpcCtx.dialbackMu.Lock()

pkg/rpc/context_test.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -876,9 +876,6 @@ func TestOffsetMeasurement(t *testing.T) {
876876
clientClock := &AdvancingClock{time: timeutil.Unix(0, 10)}
877877
clientMaxOffset := time.Duration(0)
878878
clientCtx := newTestContext(clusterID, clientClock, clientMaxOffset, stopper)
879-
// Make the interval shorter to speed up the test.
880-
clientCtx.RPCHeartbeatInterval = 1 * time.Millisecond
881-
clientCtx.RPCHeartbeatTimeout = 1 * time.Millisecond
882879
clientCtx.RemoteClocks.offsetTTL = 5 * clientClock.getAdvancementInterval()
883880
if _, err := clientCtx.GRPCDialNode(remoteAddr, serverNodeID, DefaultClass).Connect(ctx); err != nil {
884881
t.Fatal(err)
@@ -952,7 +949,7 @@ func TestFailedOffsetMeasurement(t *testing.T) {
952949
clientCtx := newTestContext(clusterID, clock, maxOffset, stopper)
953950
// Remove the timeout so that failure arises from exceeding the maximum
954951
// clock reading delay, not the timeout.
955-
clientCtx.heartbeatTimeout = 0
952+
clientCtx.RPCHeartbeatTimeout = 0
956953
// Allow two heartbeat for initialization. The first ping doesn't report an offset,
957954
// the second one thus doesn't have an offset to work with, so it's only on the
958955
// third one that's fully configured.
@@ -1022,9 +1019,6 @@ func TestLatencyInfoCleanupOnClosedConnection(t *testing.T) {
10221019
clientClock := &AdvancingClock{time: timeutil.Unix(0, 10)}
10231020
clientMaxOffset := time.Duration(0)
10241021
clientCtx := newTestContext(clusterID, clientClock, clientMaxOffset, stopper)
1025-
// Make the interval shorter to speed up the test.
1026-
clientCtx.RPCHeartbeatInterval = 1 * time.Millisecond
1027-
clientCtx.RPCHeartbeatTimeout = 1 * time.Millisecond
10281022

10291023
var hbDecommission atomic.Value
10301024
hbDecommission.Store(false)

pkg/rpc/datadriven_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ func setupEnv(t *testing.T) *ddEnv {
148148
maxOffset := time.Duration(250)
149149

150150
clientCtx := newTestContext(clusterID, clock, maxOffset, stopper)
151-
clientCtx.heartbeatInterval = 10 * time.Millisecond
152151

153152
env := &ddEnv{
154153
clusterID: clusterID,

pkg/rpc/peer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ func (rpcCtx *Context) newPeer(k peerKey) *peer {
184184
dial: func(ctx context.Context, target string, class ConnectionClass) (*grpc.ClientConn, error) {
185185
return rpcCtx.grpcDialRaw(ctx, target, class, rpcCtx.testingDialOpts...)
186186
},
187-
heartbeatInterval: rpcCtx.heartbeatInterval,
188-
heartbeatTimeout: rpcCtx.heartbeatTimeout,
187+
heartbeatInterval: rpcCtx.RPCHeartbeatInterval,
188+
heartbeatTimeout: rpcCtx.RPCHeartbeatTimeout,
189189
}
190190
var b *circuit.Breaker
191191

0 commit comments

Comments
 (0)