Skip to content

Commit 4ba00f1

Browse files
committed
democluster: rely less on firstServer
Prior to this patch, we were over-relying on the "firstServer" in a demo cluster to do decommission/recommission operations. In a later version, we would like to introduce the option to stop/restart the first server. in that case, it would not necessarily be available. This commit makes one step in that direction. Release note: None
1 parent 7380a91 commit 4ba00f1

File tree

1 file changed

+32
-9
lines changed

1 file changed

+32
-9
lines changed

pkg/cli/democluster/demo_cluster.go

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,30 @@ func (c *transientCluster) DrainAndShutdown(ctx context.Context, nodeID int32) e
10251025
return nil
10261026
}
10271027

1028+
// findOtherServer returns an admin RPC client to another
1029+
// server than the one referred to by the node ID.
1030+
func (c *transientCluster) findOtherServer(
1031+
ctx context.Context, nodeID int32, op string,
1032+
) (serverpb.AdminClient, func(), error) {
1033+
// Find a node to use as the sender.
1034+
var adminClient serverpb.AdminClient
1035+
var finish func()
1036+
for _, s := range c.servers {
1037+
if s.TestServer != nil && s.nodeID != roachpb.NodeID(nodeID) {
1038+
var err error
1039+
adminClient, finish, err = c.getAdminClient(ctx, *(s.TestServer.Cfg))
1040+
if err != nil {
1041+
return nil, nil, err
1042+
}
1043+
break
1044+
}
1045+
}
1046+
if adminClient == nil {
1047+
return nil, nil, errors.Newf("no other nodes available to send a %s request", op)
1048+
}
1049+
return adminClient, finish, nil
1050+
}
1051+
10281052
// Recommission recommissions a given node.
10291053
func (c *transientCluster) Recommission(ctx context.Context, nodeID int32) error {
10301054
nodeIndex := int(nodeID - 1)
@@ -1041,14 +1065,14 @@ func (c *transientCluster) Recommission(ctx context.Context, nodeID int32) error
10411065
ctx, cancel := context.WithCancel(ctx)
10421066
defer cancel()
10431067

1044-
adminClient, finish, err := c.getAdminClient(ctx, *(c.firstServer.Cfg))
1068+
// Find a node to use as the sender.
1069+
adminClient, finish, err := c.findOtherServer(ctx, nodeID, "recommission")
10451070
if err != nil {
10461071
return err
10471072
}
1048-
10491073
defer finish()
1050-
_, err = adminClient.Decommission(ctx, req)
1051-
if err != nil {
1074+
1075+
if _, err = adminClient.Decommission(ctx, req); err != nil {
10521076
return errors.Wrap(err, "while trying to mark as decommissioning")
10531077
}
10541078

@@ -1066,7 +1090,8 @@ func (c *transientCluster) Decommission(ctx context.Context, nodeID int32) error
10661090
ctx, cancel := context.WithCancel(ctx)
10671091
defer cancel()
10681092

1069-
adminClient, finish, err := c.getAdminClient(ctx, *(c.firstServer.Cfg))
1093+
// Find a node to use as the sender.
1094+
adminClient, finish, err := c.findOtherServer(ctx, nodeID, "decommission")
10701095
if err != nil {
10711096
return err
10721097
}
@@ -1080,8 +1105,7 @@ func (c *transientCluster) Decommission(ctx context.Context, nodeID int32) error
10801105
NodeIDs: []roachpb.NodeID{roachpb.NodeID(nodeID)},
10811106
TargetMembership: livenesspb.MembershipStatus_DECOMMISSIONING,
10821107
}
1083-
_, err = adminClient.Decommission(ctx, req)
1084-
if err != nil {
1108+
if _, err := adminClient.Decommission(ctx, req); err != nil {
10851109
return errors.Wrap(err, "while trying to mark as decommissioning")
10861110
}
10871111
}
@@ -1091,8 +1115,7 @@ func (c *transientCluster) Decommission(ctx context.Context, nodeID int32) error
10911115
NodeIDs: []roachpb.NodeID{roachpb.NodeID(nodeID)},
10921116
TargetMembership: livenesspb.MembershipStatus_DECOMMISSIONED,
10931117
}
1094-
_, err = adminClient.Decommission(ctx, req)
1095-
if err != nil {
1118+
if _, err := adminClient.Decommission(ctx, req); err != nil {
10961119
return errors.Wrap(err, "while trying to mark as decommissioned")
10971120
}
10981121
}

0 commit comments

Comments
 (0)