9
9
"context"
10
10
"fmt"
11
11
"io"
12
+ "net"
12
13
"sync/atomic"
13
14
"time"
14
15
@@ -35,7 +36,6 @@ import (
35
36
"github.com/cockroachdb/errors"
36
37
"github.com/cockroachdb/redact"
37
38
"golang.org/x/sync/errgroup"
38
- "google.golang.org/grpc"
39
39
)
40
40
41
41
const rangeMetadataScanChunkSize = 100
@@ -720,6 +720,22 @@ func makeVisitAvailableNodesInParallel(
720
720
return err
721
721
}
722
722
723
+ // Initialize a nodedialer to establish connections with nodes. We'll
724
+ // create a custom resolver that uses the already available node list,
725
+ // which is more efficient than fetching node information again. Node
726
+ // dialer allows us to reuse utility methods to create RPC connections.
727
+ resolver := func (nodeID roachpb.NodeID ) (net.Addr , roachpb.Locality , error ) {
728
+ for _ , node := range nodes {
729
+ if node .NodeID == nodeID {
730
+ addr := node .AddressForLocality (loc )
731
+ return addr , node .Locality , nil
732
+ }
733
+ }
734
+ // This should not happen since the visitor visits the exact same nodes.
735
+ return nil , roachpb.Locality {}, errors .Newf ("node n%d not found in gossip" , nodeID )
736
+ }
737
+ nd := nodedialer .New (rpcCtx , resolver )
738
+
723
739
var g errgroup.Group
724
740
if maxConcurrency == 0 {
725
741
// "A value of 0 disables concurrency."
@@ -729,7 +745,7 @@ func makeVisitAvailableNodesInParallel(
729
745
for _ , node := range nodes {
730
746
node := node // copy for closure
731
747
g .Go (func () error {
732
- return visitNodeWithRetry (ctx , loc , rpcCtx , retryOpts , visitor , node )
748
+ return visitNodeWithRetry (ctx , nd , retryOpts , visitor , node )
733
749
})
734
750
}
735
751
return g .Wait ()
@@ -738,21 +754,19 @@ func makeVisitAvailableNodesInParallel(
738
754
739
755
func visitNodeWithRetry (
740
756
ctx context.Context ,
741
- loc roachpb.Locality ,
742
- rpcCtx * rpc.Context ,
757
+ nd rpcbase.NodeDialerNoBreaker ,
743
758
retryOpts retry.Options ,
744
759
visitor visitNodeAdminFn ,
745
760
node roachpb.NodeDescriptor ,
746
761
) error {
747
762
var err error
763
+ var ac serverpb.AdminClient
748
764
for r := retry .StartWithCtx (ctx , retryOpts ); r .Next (); {
749
765
log .Infof (ctx , "visiting node n%d, attempt %d" , node .NodeID , r .CurrentAttempt ())
750
- addr := node .AddressForLocality (loc )
751
- var conn * grpc.ClientConn
752
766
// Note that we use ConnectNoBreaker here to avoid any race with probe
753
767
// running on current node and target node restarting. Errors from circuit
754
768
// breaker probes could confuse us and present node as unavailable.
755
- conn , _ , err = rpcCtx . GRPCDialNode ( addr . String (), node . NodeID , node .Locality , rpcbase . DefaultClass ). ConnectNoBreaker ( ctx )
769
+ ac , err = serverpb . DialAdminClientNoBreaker ( nd , ctx , node .NodeID )
756
770
// Nodes would contain dead nodes that we don't need to visit. We can skip
757
771
// them and let caller handle incomplete info.
758
772
if err != nil {
@@ -765,8 +779,7 @@ func visitNodeWithRetry(
765
779
// live.
766
780
continue
767
781
}
768
- client := serverpb .NewAdminClient (conn )
769
- err = visitor (node .NodeID , client )
782
+ err = visitor (node .NodeID , ac )
770
783
if err == nil {
771
784
return nil
772
785
}
0 commit comments