Skip to content

Commit 11c3665

Browse files
server: add information filtering to hot ranges endpoint
This change introduces two enhancements to the hot ranges page. The first is the omission of table descriptors if specified, the second allows callers to specify per-node limits on the number of ranges requested. Specifying `StatsOnly` on the hot ranges call will cause the call to skip collecting table descriptors to include in the response, which means the call will not be required to read from the keyspace. The `PerNodeLimit` specifies a local limit for a hot ranges call, so that we only include a number of replicas for each node local call made, (different than the global limit enforced today). Fixes: #142595 Epic: CRDB-43150 Release note (general change): Allows api callers to specify statistics only and a per-node limit for the hot ranges response.
1 parent e06eaf6 commit 11c3665

File tree

4 files changed

+135
-6
lines changed

4 files changed

+135
-6
lines changed

docs/generated/http/full.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3599,6 +3599,8 @@ of ranges currently considered “hot” by the node(s).
35993599
| page_token | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | | [reserved](#support-status) |
36003600
| tenant_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | | [reserved](#support-status) |
36013601
| nodes | [string](#cockroach.server.serverpb.HotRangesRequest-string) | repeated | | [reserved](#support-status) |
3602+
| per_node_limit | [int32](#cockroach.server.serverpb.HotRangesRequest-int32) | | per_node_limit indicates the maximum number of hot ranges to return for each node. If left empty, the default is 128. | [reserved](#support-status) |
3603+
| stats_only | [bool](#cockroach.server.serverpb.HotRangesRequest-bool) | | stats_only indicates whether to return only the stats for the hot ranges, without pulling descriptor information. | [reserved](#support-status) |
36023604

36033605

36043606

pkg/server/serverpb/status.proto

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,18 @@ message HotRangesRequest {
13871387
(gogoproto.customname) = "Nodes",
13881388
(gogoproto.nullable) = true
13891389
];
1390+
// per_node_limit indicates the maximum number of hot ranges
1391+
// to return for each node. If left empty, the default is 128.
1392+
int32 per_node_limit = 6 [
1393+
(gogoproto.customname) = "PerNodeLimit",
1394+
(gogoproto.nullable) = true
1395+
];
1396+
// stats_only indicates whether to return only the stats
1397+
// for the hot ranges, without pulling descriptor information.
1398+
bool stats_only = 7 [
1399+
(gogoproto.customname) = "StatsOnly",
1400+
(gogoproto.nullable) = true
1401+
];
13901402
}
13911403

13921404
// HotRangesResponseV2 is a response payload returned by `HotRangesV2` service.

pkg/server/status.go

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package server
77

88
import (
99
"bytes"
10+
"cmp"
1011
"context"
1112
"crypto/ecdsa"
1213
"crypto/rsa"
@@ -19,6 +20,7 @@ import (
1920
"os/exec"
2021
"reflect"
2122
"regexp"
23+
"slices"
2224
"sort"
2325
"strconv"
2426
"strings"
@@ -2863,7 +2865,7 @@ func (t *statusServer) HotRangesV2(
28632865
}
28642866

28652867
ti, _ := t.sqlServer.tenantConnect.TenantInfo()
2866-
if ti.TenantID.IsSet() {
2868+
if ti.TenantID.IsSet() && !req.StatsOnly {
28672869
err = t.addDescriptorsToHotRanges(ctx, resp)
28682870
if err != nil {
28692871
return nil, err
@@ -2923,13 +2925,13 @@ func (s *systemStatusServer) HotRangesV2(
29232925
return nil, errors.New("cannot call 'local' mixed with other nodes")
29242926
}
29252927

2926-
resp, err := s.localHotRanges(ctx, tenantID, requestedNodeID)
2928+
resp, err := s.localHotRanges(tenantID, requestedNodeID, int(req.PerNodeLimit))
29272929
if err != nil {
29282930
return nil, err
29292931
}
29302932

2931-
// If operating as the system tenant, add descriptor data to the reposnse.
2932-
if !tenantID.IsSet() {
2933+
// If explicitly set as the system tenant, or unset, add descriptor data to the reposnse.
2934+
if !tenantID.IsSet() && !req.StatsOnly {
29332935
err = s.addDescriptorsToHotRanges(ctx, resp)
29342936
if err != nil {
29352937
return nil, err
@@ -2943,7 +2945,12 @@ func (s *systemStatusServer) HotRangesV2(
29432945
requestedNodes = append(requestedNodes, requestedNodeID)
29442946
}
29452947

2946-
remoteRequest := serverpb.HotRangesRequest{Nodes: []string{"local"}, TenantID: req.TenantID}
2948+
remoteRequest := serverpb.HotRangesRequest{
2949+
Nodes: []string{"local"},
2950+
TenantID: req.TenantID,
2951+
PerNodeLimit: req.PerNodeLimit,
2952+
StatsOnly: req.StatsOnly,
2953+
}
29472954
nodeFn := func(ctx context.Context, status serverpb.StatusClient, nodeID roachpb.NodeID) ([]*serverpb.HotRangesResponseV2_HotRange, error) {
29482955
nodeResp, err := status.HotRangesV2(ctx, &remoteRequest)
29492956
if err != nil {
@@ -2990,7 +2997,7 @@ func (s *systemStatusServer) HotRangesV2(
29902997
// Returns a HotRangesResponseV2 containing detailed information about each hot range,
29912998
// or an error if the operation fails.
29922999
func (s *systemStatusServer) localHotRanges(
2993-
ctx context.Context, tenantID roachpb.TenantID, requestedNodeID roachpb.NodeID,
3000+
tenantID roachpb.TenantID, requestedNodeID roachpb.NodeID, localLimit int,
29943001
) (*serverpb.HotRangesResponseV2, error) {
29953002
// Initialize response object
29963003
var resp serverpb.HotRangesResponseV2
@@ -3048,6 +3055,15 @@ func (s *systemStatusServer) localHotRanges(
30483055
return nil, err
30493056
}
30503057

3058+
// sort the slices by cpu
3059+
slices.SortFunc(resp.Ranges, func(a, b *serverpb.HotRangesResponseV2_HotRange) int {
3060+
return cmp.Compare(a.CPUTimePerSecond, b.CPUTimePerSecond)
3061+
})
3062+
// truncate the response if localLimit is set
3063+
if localLimit != 0 && localLimit < len(resp.Ranges) {
3064+
resp.Ranges = resp.Ranges[:localLimit]
3065+
}
3066+
30513067
return &resp, nil
30523068
}
30533069

pkg/server/status_test.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package server
88
import (
99
"context"
1010
"encoding/json"
11+
"fmt"
1112
"os"
1213
"slices"
1314
"sync"
@@ -820,3 +821,101 @@ func TestHotRangesByNode(t *testing.T) {
820821
require.Error(t, err, "cannot call 'local' mixed with other nodes")
821822
})
822823
}
824+
825+
func TestHotRangesStatsOnly(t *testing.T) {
826+
defer leaktest.AfterTest(t)()
827+
sc := log.ScopeWithoutShowLogs(t)
828+
defer sc.Close(t)
829+
830+
ctx := context.Background()
831+
832+
s := serverutils.StartServerOnly(t, base.TestServerArgs{
833+
DefaultTestTenant: base.TestControlsTenantsExplicitly,
834+
StoreSpecs: []base.StoreSpec{
835+
base.DefaultTestStoreSpec,
836+
base.DefaultTestStoreSpec,
837+
base.DefaultTestStoreSpec,
838+
},
839+
Knobs: base.TestingKnobs{
840+
Store: &kvserver.StoreTestingKnobs{
841+
ReplicaPlannerKnobs: plan.ReplicaPlannerTestingKnobs{
842+
DisableReplicaRebalancing: true,
843+
},
844+
},
845+
},
846+
})
847+
defer s.Stopper().Stop(ctx)
848+
849+
for _, test := range []struct {
850+
statsOnly bool
851+
hasDescriptors bool
852+
}{
853+
{true, false},
854+
{false, true},
855+
} {
856+
t.Run(fmt.Sprintf("statsOnly=%t hasDescriptors %t", test.statsOnly, test.hasDescriptors), func(t *testing.T) {
857+
testutils.SucceedsSoon(t, func() error {
858+
ss := s.StatusServer().(*systemStatusServer)
859+
resp, err := ss.HotRangesV2(ctx, &serverpb.HotRangesRequest{NodeID: "local", StatsOnly: test.statsOnly})
860+
if err != nil {
861+
return err
862+
}
863+
864+
if len(resp.Ranges) == 0 {
865+
return errors.New("waiting for hot ranges to be collected")
866+
}
867+
868+
hasDescriptors := false
869+
for _, r := range resp.Ranges {
870+
allDescriptors := append(r.Databases, append(r.Tables, r.Indexes...)...)
871+
if len(allDescriptors) > 0 {
872+
hasDescriptors = true
873+
}
874+
}
875+
876+
require.Equal(t, test.hasDescriptors, hasDescriptors)
877+
return nil
878+
})
879+
})
880+
}
881+
}
882+
883+
func TestHotRangesNodeLimit(t *testing.T) {
884+
defer leaktest.AfterTest(t)()
885+
sc := log.ScopeWithoutShowLogs(t)
886+
defer sc.Close(t)
887+
888+
ctx := context.Background()
889+
890+
s := serverutils.StartServerOnly(t, base.TestServerArgs{
891+
DefaultTestTenant: base.TestControlsTenantsExplicitly,
892+
StoreSpecs: []base.StoreSpec{
893+
base.DefaultTestStoreSpec,
894+
base.DefaultTestStoreSpec,
895+
base.DefaultTestStoreSpec,
896+
},
897+
Knobs: base.TestingKnobs{
898+
Store: &kvserver.StoreTestingKnobs{
899+
ReplicaPlannerKnobs: plan.ReplicaPlannerTestingKnobs{
900+
DisableReplicaRebalancing: true,
901+
},
902+
},
903+
},
904+
})
905+
defer s.Stopper().Stop(ctx)
906+
907+
testutils.SucceedsSoon(t, func() error {
908+
ss := s.StatusServer().(*systemStatusServer)
909+
resp, err := ss.HotRangesV2(ctx, &serverpb.HotRangesRequest{NodeID: "local", PerNodeLimit: 5})
910+
if err != nil {
911+
return err
912+
}
913+
914+
if len(resp.Ranges) == 0 {
915+
return errors.New("waiting for hot ranges to be collected")
916+
}
917+
918+
require.Equal(t, 5, len(resp.Ranges))
919+
return nil
920+
})
921+
}

0 commit comments

Comments
 (0)