Skip to content

Commit 0ebbc3c

Browse files
committed
kvserver: return error properly for GetAggregatedStoreStats
Previously, GetAggregatedStoreStats would panic on error for store.Capacity instead of returning it. This commit fixes the issue by properly returning the error back to store.Descriptor.
1 parent 4a0ad21 commit 0ebbc3c

File tree

4 files changed

+28
-14
lines changed

4 files changed

+28
-14
lines changed

pkg/kv/kvserver/load/node_capacity_provider.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ type StoresStatsAggregator interface {
2424
// GetAggregatedStoreStats returns the total cpu usage across all stores and
2525
// the count of stores. If useCached is true, it uses the cached store
2626
// descriptor instead of computing new ones. Implemented by Stores.
27-
GetAggregatedStoreStats(useCached bool) (aggregatedCPUUsage int64, totalStoreCount int32)
27+
GetAggregatedStoreStats(useCached bool) (aggregatedCPUUsage int64, totalStoreCount int32, err error)
2828
}
2929

3030
// NodeCapacityProvider reports node-level cpu usage and capacity by sampling
@@ -82,8 +82,11 @@ func (n *NodeCapacityProvider) Run(ctx context.Context) {
8282
// capacity and aggregated store-level cpu usage. If useCached is true, it will
8383
// use cached store descriptors to aggregate the sum of store-level cpu
8484
// capacity.
85-
func (n *NodeCapacityProvider) GetNodeCapacity(useCached bool) roachpb.NodeCapacity {
86-
storesCPURate, numStores := n.stores.GetAggregatedStoreStats(useCached)
85+
func (n *NodeCapacityProvider) GetNodeCapacity(useCached bool) (roachpb.NodeCapacity, error) {
86+
storesCPURate, numStores, err := n.stores.GetAggregatedStoreStats(useCached)
87+
if err != nil {
88+
return roachpb.NodeCapacity{}, err
89+
}
8790
// TODO(wenyihu6): may be unexpected to caller that useCached only applies to
8891
// the stores stats but not runtime load monitor. We can change
8992
// runtimeLoadMonitor to also fetch updated stats.
@@ -95,7 +98,7 @@ func (n *NodeCapacityProvider) GetNodeCapacity(useCached bool) roachpb.NodeCapac
9598
NumStores: numStores,
9699
NodeCPURateCapacity: cpuCapacityNanoPerSec,
97100
NodeCPURateUsage: cpuUsageNanoPerSec,
98-
}
101+
}, nil
99102
}
100103

101104
// runtimeLoadMonitor polls cpu usage and capacity stats of the node

pkg/kv/kvserver/load/node_capacity_provider_test.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ type mockStoresStatsAggregator struct {
2525

2626
func (m *mockStoresStatsAggregator) GetAggregatedStoreStats(
2727
_ bool,
28-
) (totalCPUUsage int64, totalStoreCount int32) {
29-
return m.cpuUsage, m.storeCount
28+
) (totalCPUUsage int64, totalStoreCount int32, _ error) {
29+
return m.cpuUsage, m.storeCount, nil
3030
}
3131

3232
// TestNodeCapacityProvider tests the basic functionality of the
@@ -51,7 +51,8 @@ func TestNodeCapacityProvider(t *testing.T) {
5151

5252
// Provider should have valid stats.
5353
testutils.SucceedsSoon(t, func() error {
54-
nc := provider.GetNodeCapacity(false)
54+
nc, err := provider.GetNodeCapacity(false)
55+
require.NoError(t, err)
5556
if nc.NodeCPURateUsage == 0 || nc.NodeCPURateCapacity == 0 || nc.StoresCPURate == 0 {
5657
return errors.Newf(
5758
"CPU usage or capacity is 0: node cpu rate usage %v, node cpu rate capacity %v, stores cpu rate %v",
@@ -62,7 +63,8 @@ func TestNodeCapacityProvider(t *testing.T) {
6263

6364
cancel()
6465
// GetNodeCapacity should still return valid stats after cancellation.
65-
nc := provider.GetNodeCapacity(false)
66+
nc, err := provider.GetNodeCapacity(false)
67+
require.NoError(t, err)
6668
require.Greater(t, nc.NodeCPURateCapacity, int64(0))
6769
require.Greater(t, nc.NodeCPURateUsage, int64(0))
6870
}

pkg/kv/kvserver/store.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3295,14 +3295,19 @@ func (s *Store) Descriptor(ctx context.Context, useCached bool) (*roachpb.StoreD
32953295
return nil, err
32963296
}
32973297

3298+
nc, err := s.nodeCapacityProvider.GetNodeCapacity(useCached)
3299+
if err != nil {
3300+
return nil, err
3301+
}
3302+
32983303
// Initialize the store descriptor.
32993304
return &roachpb.StoreDescriptor{
33003305
StoreID: s.Ident.StoreID,
33013306
Attrs: s.Attrs(),
33023307
Node: *s.nodeDesc,
33033308
Capacity: capacity,
33043309
Properties: s.Properties(),
3305-
NodeCapacity: s.nodeCapacityProvider.GetNodeCapacity(useCached),
3310+
NodeCapacity: nc,
33063311
}, nil
33073312
}
33083313

pkg/kv/kvserver/stores.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -338,15 +338,19 @@ func (ls *Stores) GetStoreMetricRegistry(storeID roachpb.StoreID) *metric.Regist
338338

339339
// GetAggregatedStoreStats returns the aggregated cpu usage across all stores and
340340
// the count of stores.
341-
func (ls *Stores) GetAggregatedStoreStats(useCached bool) (storesCPURate int64, numStores int32) {
342-
_ = ls.VisitStores(func(s *Store) error {
341+
func (ls *Stores) GetAggregatedStoreStats(
342+
useCached bool,
343+
) (storesCPURate int64, numStores int32, _ error) {
344+
if err := ls.VisitStores(func(s *Store) error {
343345
c, err := s.Capacity(context.Background(), useCached)
344346
if err != nil {
345-
panic(err)
347+
return err
346348
}
347349
storesCPURate += int64(c.CPUPerSecond)
348350
numStores++
349351
return nil
350-
})
351-
return storesCPURate, numStores
352+
}); err != nil {
353+
return 0, 0, err
354+
}
355+
return storesCPURate, numStores, nil
352356
}

0 commit comments

Comments
 (0)