@@ -14,18 +14,15 @@ import (
14
14
"github.com/VividCortex/ewma"
15
15
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
16
16
"github.com/cockroachdb/cockroach/pkg/roachpb"
17
- "github.com/cockroachdb/cockroach/pkg/rpc/rpcbase"
18
17
"github.com/cockroachdb/cockroach/pkg/util/circuit"
19
18
"github.com/cockroachdb/cockroach/pkg/util/grpcutil"
20
19
"github.com/cockroachdb/cockroach/pkg/util/log"
21
20
"github.com/cockroachdb/cockroach/pkg/util/netutil"
22
- "github.com/cockroachdb/cockroach/pkg/util/stop"
23
21
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
24
22
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
25
23
"github.com/cockroachdb/errors"
26
24
"github.com/cockroachdb/logtags"
27
25
"github.com/cockroachdb/redact"
28
- "google.golang.org/grpc"
29
26
"google.golang.org/grpc/status"
30
27
"storj.io/drpc"
31
28
)
@@ -121,15 +118,13 @@ func (p *peer[Conn]) releaseMetricsLocked() {
121
118
// See (*peer).launch for details on the probe (heartbeat loop) itself.
122
119
type peer [Conn rpcConn ] struct {
123
120
peerMetrics
124
- k peerKey
125
- opts * ContextOptions
126
- heartbeatInterval time.Duration
127
- heartbeatTimeout time.Duration
128
- dial func (ctx context.Context , target string , class rpcbase.ConnectionClass ) (Conn , error )
129
- dialDRPC func (ctx context.Context , target string , class rpcbase.ConnectionClass ) (drpc.Conn , error )
130
- newHeartbeatClient heartbeatClientConstructor [Conn ]
131
- newBatchStreamClient streamConstructor [* kvpb.BatchRequest , * kvpb.BatchResponse , Conn ]
132
- newCloseNotifier closeNotifierConstructor [Conn ]
121
+ k peerKey
122
+ opts * ContextOptions
123
+ newHeartbeatClient heartbeatClientConstructor [Conn ]
124
+ heartbeatInterval time.Duration
125
+ heartbeatTimeout time.Duration
126
+ connOptions * ConnectionOptions [Conn ]
127
+ drpcDial dialFunc [drpc.Conn ]
133
128
// b maintains connection health. This breaker's async probe is always
134
129
// active - it is the heartbeat loop and manages `mu.c.` (including
135
130
// recreating it after the connection fails and has to be redialed).
@@ -210,6 +205,14 @@ func (p *peer[Conn]) snap() PeerSnap[Conn] {
210
205
return p .mu .PeerSnap
211
206
}
212
207
208
+ type peerOptions [Conn rpcConn ] struct {
209
+ locality roachpb.Locality
210
+ pm peerMetrics
211
+ newHeartbeatClient heartbeatClientConstructor [Conn ]
212
+ connOptions * ConnectionOptions [Conn ]
213
+ peers * peerMap [Conn ]
214
+ }
215
+
213
216
// newPeer returns circuit breaker that trips when connection (associated
214
217
// with provided peerKey) is failed. The breaker's probe *is* the heartbeat loop
215
218
// and is thus running at all times. The exception is a decommissioned node, for
@@ -229,39 +232,26 @@ func (p *peer[Conn]) snap() PeerSnap[Conn] {
229
232
// map, the next attempt to dial the node will start from a blank slate. In
230
233
// other words, even with this theoretical race, the situation will sort itself
231
234
// out quickly.
232
- func (rpcCtx * Context ) newPeer ( k peerKey , locality roachpb. Locality ) * peer [* grpc. ClientConn ] {
235
+ func newPeer [ Conn rpcConn ] (rpcCtx * Context , k peerKey , peerOpts * peerOptions [ Conn ] ) * peer [Conn ] {
233
236
// Initialization here is a bit circular. The peer holds the breaker. The
234
237
// breaker probe references the peer because it needs to replace the one-shot
235
238
// Connection when it makes a new connection in the probe. And (all but the
236
239
// first incarnation of) the Connection also holds on to the breaker since the
237
240
// Connect method needs to do the short-circuiting (if a Connection is created
238
241
// while the breaker is tripped, we want to block in Connect only once we've
239
242
// seen the first heartbeat succeed).
240
- pm , lm := rpcCtx .metrics .acquire (k , locality )
241
- p := & peer [* grpc.ClientConn ]{
242
- peerMetrics : pm ,
243
+ p := & peer [Conn ]{
244
+ peerMetrics : peerOpts .pm ,
243
245
logDisconnectEvery : log .Every (time .Minute ),
244
246
k : k ,
245
247
remoteClocks : rpcCtx .RemoteClocks ,
246
248
opts : & rpcCtx .ContextOptions ,
247
- peers : & rpcCtx .peers ,
248
- dial : func (ctx context.Context , target string , class rpcbase.ConnectionClass ) (* grpc.ClientConn , error ) {
249
- additionalDialOpts := []grpc.DialOption {grpc .WithStatsHandler (& statsTracker {lm })}
250
- additionalDialOpts = append (additionalDialOpts , rpcCtx .testingDialOpts ... )
251
- return rpcCtx .grpcDialRaw (ctx , target , class , additionalDialOpts ... )
252
- },
253
- dialDRPC : dialDRPC (rpcCtx ),
254
- newHeartbeatClient : func (cc * grpc.ClientConn ) RPCHeartbeatClient {
255
- return NewGRPCHeartbeatClientAdapter (cc )
256
- },
257
- newBatchStreamClient : func (ctx context.Context , cc * grpc.ClientConn ) (BatchStreamClient , error ) {
258
- return kvpb .NewInternalClient (cc ).BatchStream (ctx )
259
- },
260
- newCloseNotifier : func (stopper * stop.Stopper , cc * grpc.ClientConn ) closeNotifier {
261
- return & grpcCloseNotifier {stopper : stopper , conn : cc }
262
- },
263
- heartbeatInterval : rpcCtx .RPCHeartbeatInterval ,
264
- heartbeatTimeout : rpcCtx .RPCHeartbeatTimeout ,
249
+ peers : peerOpts .peers ,
250
+ connOptions : peerOpts .connOptions ,
251
+ drpcDial : dialDRPC (rpcCtx ),
252
+ newHeartbeatClient : peerOpts .newHeartbeatClient ,
253
+ heartbeatInterval : rpcCtx .RPCHeartbeatInterval ,
254
+ heartbeatTimeout : rpcCtx .RPCHeartbeatTimeout ,
265
255
}
266
256
var b * circuit.Breaker
267
257
@@ -275,8 +265,8 @@ func (rpcCtx *Context) newPeer(k peerKey, locality roachpb.Locality) *peer[*grpc
275
265
},
276
266
})
277
267
p .b = b
278
- c := newConnectionToNodeID (p .opts , k , b .Signal , p .newBatchStreamClient )
279
- p .mu .PeerSnap = PeerSnap [* grpc. ClientConn ]{c : c }
268
+ c := newConnectionToNodeID (p .opts , k , b .Signal , p .connOptions )
269
+ p .mu .PeerSnap = PeerSnap [Conn ]{c : c }
280
270
281
271
return p
282
272
}
@@ -375,7 +365,7 @@ func (p *peer[Conn]) run(ctx context.Context, report func(error), done func()) {
375
365
func () {
376
366
p .mu .Lock ()
377
367
defer p .mu .Unlock ()
378
- p .mu .c = newConnectionToNodeID (p .opts , p .k , p .mu .c .breakerSignalFn , p .newBatchStreamClient )
368
+ p .mu .c = newConnectionToNodeID (p .opts , p .k , p .mu .c .breakerSignalFn , p .connOptions )
379
369
}()
380
370
381
371
if p .snap ().deleteAfter != 0 {
@@ -388,14 +378,14 @@ func (p *peer[Conn]) run(ctx context.Context, report func(error), done func()) {
388
378
}
389
379
390
380
func (p * peer [Conn ]) runOnce (ctx context.Context , report func (error )) error {
391
- cc , err := p .dial (ctx , p .k .TargetAddr , p .k .Class )
381
+ cc , err := p .connOptions . dial (ctx , p .k .TargetAddr , p .k .Class )
392
382
if err != nil {
393
383
return err
394
384
}
395
385
defer func () {
396
386
_ = cc .Close () // nolint:grpcconnclose
397
387
}()
398
- dc , err := p .dialDRPC (ctx , p .k .TargetAddr , p .k .Class )
388
+ dc , err := p .drpcDial (ctx , p .k .TargetAddr , p .k .Class )
399
389
if err != nil {
400
390
return err
401
391
}
@@ -406,7 +396,7 @@ func (p *peer[Conn]) runOnce(ctx context.Context, report func(error)) error {
406
396
// Set up notifications on a channel when gRPC tears down, so that we
407
397
// can trigger another instant heartbeat for expedited circuit breaker
408
398
// tripping.
409
- connClosedCh := p .newCloseNotifier (p .opts .Stopper , cc ).CloseNotify (ctx )
399
+ connClosedCh := p .connOptions . newCloseNotifier (p .opts .Stopper , cc ).CloseNotify (ctx )
410
400
411
401
if p .remoteClocks != nil {
412
402
p .remoteClocks .OnConnect (ctx , p .k .NodeID )
0 commit comments