@@ -33,6 +33,11 @@ import (
33
33
"strconv"
34
34
35
35
"github.com/cockroachdb/cockroach/pkg/kv"
36
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver"
37
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl"
38
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
39
+ "github.com/cockroachdb/cockroach/pkg/raft/raftpb"
40
+ "github.com/cockroachdb/cockroach/pkg/roachpb"
36
41
"github.com/cockroachdb/cockroach/pkg/security/username"
37
42
"github.com/cockroachdb/cockroach/pkg/server/apiconstants"
38
43
"github.com/cockroachdb/cockroach/pkg/server/apiutil"
@@ -55,6 +60,7 @@ const (
55
60
56
61
type ApiV2System interface {
57
62
health (w http.ResponseWriter , r * http.Request )
63
+ restartSafetyCheck (w http.ResponseWriter , r * http.Request )
58
64
listNodes (w http.ResponseWriter , r * http.Request )
59
65
listNodeRanges (w http.ResponseWriter , r * http.Request )
60
66
}
@@ -95,7 +101,7 @@ type apiV2SystemServer struct {
95
101
}
96
102
97
103
var _ ApiV2System = & apiV2SystemServer {}
98
- var _ http.Handler = & apiV2Server {}
104
+ var _ http.Handler = & apiV2SystemServer {}
99
105
100
106
// newAPIV2Server returns a new apiV2Server.
101
107
func newAPIV2Server (ctx context.Context , opts * apiV2ServerOpts ) http.Handler {
@@ -180,6 +186,7 @@ func registerRoutes(
180
186
{"nodes/{node_id}/ranges/" , systemRoutes .listNodeRanges , true , authserver .ViewClusterMetadataRole , false },
181
187
{"ranges/hot/" , a .listHotRanges , true , authserver .ViewClusterMetadataRole , false },
182
188
{"ranges/{range_id:[0-9]+}/" , a .listRange , true , authserver .ViewClusterMetadataRole , false },
189
+ {"health/restart_safety/" , systemRoutes .restartSafetyCheck , false , authserver .RegularRole , false },
183
190
{"health/" , systemRoutes .health , false , authserver .RegularRole , false },
184
191
{"users/" , a .listUsers , true , authserver .RegularRole , false },
185
192
{"events/" , a .listEvents , true , authserver .ViewClusterMetadataRole , false },
@@ -394,6 +401,169 @@ func (a *apiV2Server) health(w http.ResponseWriter, r *http.Request) {
394
401
healthInternal (w , r , a .admin .checkReadinessForHealthCheck )
395
402
}
396
403
404
+ func (a * apiV2Server ) restartSafetyCheck (w http.ResponseWriter , r * http.Request ) {
405
+ apiutil .WriteJSONResponse (r .Context (), w , http .StatusNotImplemented , nil )
406
+ }
407
+
408
+ // # Restart Safety
409
+ //
410
+ // Endpoint to expose restart safety status. A 200 response indicates that
411
+ // terminating the node in question won't cause any ranges to become
412
+ // unavailable, at the time the response was prepared. Users may use this check
413
+ // as a precondition for advancing a rolling restart process. Checks fail with
414
+ // a 503, or a 500 in the case of an error evaluating safety.
415
+ func (a * apiV2SystemServer ) restartSafetyCheck (w http.ResponseWriter , r * http.Request ) {
416
+ ctx := r .Context ()
417
+
418
+ if r .Method != http .MethodGet {
419
+ http .Error (w , http .StatusText (http .StatusMethodNotAllowed ), http .StatusMethodNotAllowed )
420
+ return
421
+ }
422
+
423
+ const AllowMinimumQuorumFlag = "allow_minimum_quorum"
424
+
425
+ query := r .URL .Query ()
426
+ allowMinimumQuorum := false
427
+ var err error
428
+ if query .Has (AllowMinimumQuorumFlag ) {
429
+ allowMinimumQuorum , err = strconv .ParseBool (query .Get (AllowMinimumQuorumFlag ))
430
+ if err != nil {
431
+ http .Error (w , "invalid allow_minimum_quorum value; should be true or false" , http .StatusBadRequest )
432
+ return
433
+ }
434
+ }
435
+
436
+ nodeID := a .systemStatus .node .Descriptor .NodeID
437
+
438
+ res , err := checkRestartSafe (
439
+ ctx ,
440
+ nodeID ,
441
+ a .systemStatus .nodeLiveness ,
442
+ a .systemStatus .stores ,
443
+ a .systemStatus .storePool .ClusterNodeCount (),
444
+ allowMinimumQuorum ,
445
+ )
446
+ if err != nil {
447
+ http .Error (w , "Error checking store status" , http .StatusInternalServerError )
448
+ return
449
+ }
450
+
451
+ if ! res .IsRestartSafe {
452
+ // In the style of health check endpoints, we respond with a server error
453
+ // to indicate unhealthy. This makes it much easier to integrate this
454
+ // endpoint with relatively unsophisticated clients, such as shell scripts.
455
+ apiutil .WriteJSONResponse (ctx , w , http .StatusServiceUnavailable , res )
456
+ return
457
+ }
458
+ apiutil .WriteJSONResponse (ctx , w , http .StatusOK , res )
459
+ }
460
+
461
+ type storeVisitor interface {
462
+ VisitStores (visitor func (s * kvserver.Store ) error ) error
463
+ }
464
+
465
+ // RestartSafetyResponse indicates whether the current node is critical
466
+ // (cannot be restarted safely).
467
+ type RestartSafetyResponse struct {
468
+ NodeID int32 `json:"node_id,omitempty"`
469
+ // IsRestartSafe is true if restarting this node is safe, under the
470
+ // quorum restrictions requested
471
+ IsRestartSafe bool `json:"is_restart_safe,omitempty"`
472
+ // UnavailableRangeCount indicates how many currently unavailable
473
+ // ranges contribute to restart being unsafe.
474
+ UnavailableRangeCount int32 `json:"unavailable_range_count,omitempty"`
475
+ // UnderreplicatedRangeCount indicates how many currently
476
+ // underreplicated ranges contribute to restart being unsafe.
477
+ UnderreplicatedRangeCount int32 `json:"underreplicated_range_count,omitempty"`
478
+ // RaftLeadershipOnNodeCount indicates how many ranges this node leads.
479
+ RaftLeadershipOnNodeCount int32 `json:"raft_leadership_on_node_count,omitempty"`
480
+ // StoreNotDrainingCount indicates how many of this node's stores are
481
+ // not draining.
482
+ StoreNotDrainingCount int32 `json:"store_not_draining_count,omitempty"`
483
+ }
484
+
485
+ func checkRestartSafe (
486
+ ctx context.Context ,
487
+ nodeID roachpb.NodeID ,
488
+ nodeLiveness livenesspb.NodeVitalityInterface ,
489
+ stores storeVisitor ,
490
+ nodeCount int ,
491
+ allowMinimumQuorum bool ,
492
+ ) (* RestartSafetyResponse , error ) {
493
+ res := & RestartSafetyResponse {
494
+ IsRestartSafe : true ,
495
+ NodeID : int32 (nodeID ),
496
+ }
497
+
498
+ vitality := nodeLiveness .ScanNodeVitalityFromCache ()
499
+ // For each of the node's stores, check each replica's status.
500
+ err := stores .VisitStores (func (store * kvserver.Store ) error {
501
+ if int32 (store .NodeID ()) != res .NodeID {
502
+ return nil
503
+ }
504
+
505
+ if ! store .IsDraining () {
506
+ res .IsRestartSafe = false
507
+ res .StoreNotDrainingCount ++
508
+ }
509
+
510
+ store .VisitReplicas (func (replica * kvserver.Replica ) bool {
511
+ desc , spanCfg := replica .DescAndSpanConfig ()
512
+
513
+ neededVoters := allocatorimpl .GetNeededVoters (spanCfg .GetNumVoters (), nodeCount )
514
+ rangeStatus := desc .Replicas ().ReplicationStatus (func (rd roachpb.ReplicaDescriptor ) bool {
515
+ return vitality [rd .NodeID ].IsLive (livenesspb .Metrics )
516
+ }, neededVoters , - 1 )
517
+
518
+ isLeader := replica .RaftBasicStatus ().RaftState == raftpb .StateLeader
519
+
520
+ // Reject Unavailable, Underreplicated, and Leader replicas as unsafe
521
+ // to terminate. Additionally, reject when the store (really the node,
522
+ // but the status is reported at the store level) is not draining.
523
+ if ! rangeStatus .Available {
524
+ res .IsRestartSafe = false
525
+ res .UnavailableRangeCount ++
526
+ }
527
+ if isLeader {
528
+ res .IsRestartSafe = false
529
+ res .RaftLeadershipOnNodeCount ++
530
+ }
531
+
532
+ if rangeStatus .UnderReplicated {
533
+ if neededVoters >= 5 && allowMinimumQuorum {
534
+ // When neededVoters >= 5, present underreplication doesn't actually imply unavailability after we terminate
535
+ // this node. The caller has opted in to allowMinimumQuorum restarts, so check whether this node being down
536
+ // actually causes unavailability.
537
+ futureStatus := desc .Replicas ().ReplicationStatus (func (rd roachpb.ReplicaDescriptor ) bool {
538
+ if rd .NodeID == nodeID {
539
+ return false
540
+ }
541
+ return vitality [rd .NodeID ].IsLive (livenesspb .Metrics )
542
+ }, neededVoters , - 1 )
543
+
544
+ if ! futureStatus .Available {
545
+ // Even minimum quorum won't be maintained if we down this node.
546
+ res .IsRestartSafe = false
547
+ res .UnderreplicatedRangeCount ++
548
+ }
549
+ } else {
550
+ // No allowMiniumQuorum (or not a big enough RF for that to matter), so under replication is enough.
551
+ res .IsRestartSafe = false
552
+ res .UnderreplicatedRangeCount ++
553
+ }
554
+ }
555
+
556
+ return ctx .Err () == nil
557
+ })
558
+
559
+ if ctx .Err () != nil {
560
+ return ctx .Err ()
561
+ }
562
+ return nil
563
+ })
564
+ return res , err
565
+ }
566
+
397
567
// # Get metric recording and alerting rule templates
398
568
//
399
569
// Endpoint to export recommended metric recording and alerting rules.
0 commit comments