cockroachdb
diff --git a/‎pkg/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions b/‎pkg/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pkg/base/testing_knobs.go‎
Lines changed: 1 addition & 0 deletions b/‎pkg/base/testing_knobs.go‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pkg/kv/kvserver/asim/gossip/gossip.go‎
Lines changed: 5 additions & 16 deletions b/‎pkg/kv/kvserver/asim/gossip/gossip.go‎
Lines changed: 5 additions & 16 deletions
diff --git a/‎pkg/kv/kvserver/load/BUILD.bazel‎
Lines changed: 21 additions & 1 deletion b/‎pkg/kv/kvserver/load/BUILD.bazel‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎pkg/kv/kvserver/load/node_capacity_provider.go‎
Lines changed: 205 additions & 0 deletions b/‎pkg/kv/kvserver/load/node_capacity_provider.go‎
Lines changed: 205 additions & 0 deletions
diff --git a/‎pkg/kv/kvserver/load/node_capacity_provider_test.go‎
Lines changed: 67 additions & 0 deletions b/‎pkg/kv/kvserver/load/node_capacity_provider_test.go‎
Lines changed: 67 additions & 0 deletions
@@ -265,6 +265,7 @@ ALL_TESTS = [
     "//pkg/kv/kvserver/kvstorage:kvstorage_test",
     "//pkg/kv/kvserver/leases:leases_test",
     "//pkg/kv/kvserver/liveness:liveness_test",
+    "//pkg/kv/kvserver/load:load_test",
     "//pkg/kv/kvserver/lockspanset:lockspanset_test",
     "//pkg/kv/kvserver/logstore:logstore_test",
     "//pkg/kv/kvserver/loqrecovery/loqrecoverypb:loqrecoverypb_test",
@@ -1547,6 +1548,7 @@ GO_TARGETS = [
     "//pkg/kv/kvserver/liveness:liveness",
     "//pkg/kv/kvserver/liveness:liveness_test",
     "//pkg/kv/kvserver/load:load",
+    "//pkg/kv/kvserver/load:load_test",
     "//pkg/kv/kvserver/lockspanset:lockspanset",
     "//pkg/kv/kvserver/lockspanset:lockspanset_test",
     "//pkg/kv/kvserver/logstore:logstore",
 
@@ -59,4 +59,5 @@ type TestingKnobs struct {
 	LicenseTestingKnobs            ModuleTestingKnobs
 	VecIndexTestingKnobs           ModuleTestingKnobs
 	PolicyRefresherTestingKnobs    ModuleTestingKnobs
+	NodeCapacityProviderKnobs      ModuleTestingKnobs
 }
@@ -51,7 +51,6 @@ type storeGossiper struct {
 
 func newStoreGossiper(
 	descriptorGetter func(cached bool) roachpb.StoreDescriptor,
-	nodeCapacityProvider kvserver.NodeCapacityProvider,
 	clock timeutil.TimeSource,
 	st *cluster.Settings,
 ) *storeGossiper {
@@ -62,9 +61,8 @@ func newStoreGossiper(
 
 	desc := sg.descriptorGetter(false /* cached */)
 	knobs := kvserver.StoreGossipTestingKnobs{AsyncDisabled: true}
-	sg.local = kvserver.NewStoreGossip(sg, sg, knobs, &st.SV, clock, nodeCapacityProvider)
+	sg.local = kvserver.NewStoreGossip(sg, sg, knobs, &st.SV, clock)
 	sg.local.Ident = roachpb.StoreIdent{StoreID: desc.StoreID, NodeID: desc.Node.NodeID}
-
 	return sg
 }
 
@@ -119,26 +117,17 @@ func NewGossip(s state.State, settings *config.SimulationSettings) *gossip {
 	return g
 }
 
-var _ kvserver.NodeCapacityProvider = &simNodeCapacityProvider{}
-
-type simNodeCapacityProvider struct {
-	localNodeID state.NodeID
-	state       state.State
-}
-
-func (s simNodeCapacityProvider) GetNodeCapacity(_ bool) roachpb.NodeCapacity {
-	return s.state.NodeCapacity(s.localNodeID)
-}
-
 func (g *gossip) addStoreToGossip(s state.State, storeID state.StoreID, nodeID state.NodeID) {
 	// Add the store gossip in an "adding" state initially, this is to avoid
 	// recursive calls to get the store descriptor.
 	g.storeGossip[storeID] = &storeGossiper{addingStore: true}
 	g.storeGossip[storeID] = newStoreGossiper(
 		func(cached bool) roachpb.StoreDescriptor {
-			return s.StoreDescriptors(cached, storeID)[0]
+			nc := s.NodeCapacity(nodeID)
+			desc := s.StoreDescriptors(cached, storeID)[0]
+			desc.NodeCapacity = nc
+			return desc
 		},
-		simNodeCapacityProvider{localNodeID: nodeID, state: s},
 		s.Clock(), g.settings.ST)
 }
 
 
@@ -1,18 +1,38 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
 
 go_library(
     name = "load",
     srcs = [
+        "node_capacity_provider.go",
         "record_replica_load.go",
         "replica_load.go",
+        "testing_knobs.go",
     ],
     importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/load",
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/kv/kvserver/replicastats",
         "//pkg/roachpb",
+        "//pkg/server/status",
+        "//pkg/util/buildutil",
         "//pkg/util/hlc",
+        "//pkg/util/log",
+        "//pkg/util/stop",
         "//pkg/util/syncutil",
         "//pkg/util/timeutil",
+        "@com_github_cockroachdb_errors//:errors",
+        "@com_github_vividcortex_ewma//:ewma",
+    ],
+)
+
+go_test(
+    name = "load_test",
+    srcs = ["node_capacity_provider_test.go"],
+    deps = [
+        ":load",
+        "//pkg/testutils",
+        "//pkg/util/stop",
+        "@com_github_cockroachdb_errors//:errors",
+        "@com_github_stretchr_testify//require",
     ],
 )
@@ -0,0 +1,205 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package load
+
+import (
+	"context"
+	"time"
+
+	"github.com/VividCortex/ewma"
+	"github.com/cockroachdb/cockroach/pkg/roachpb"
+	"github.com/cockroachdb/cockroach/pkg/server/status"
+	"github.com/cockroachdb/cockroach/pkg/util/buildutil"
+	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/cockroachdb/cockroach/pkg/util/stop"
+	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
+	"github.com/cockroachdb/errors"
+)
+
+// StoresStatsAggregator provides aggregated cpu usage stats across all stores.
+type StoresStatsAggregator interface {
+	// GetAggregatedStoreStats returns the total cpu usage across all stores and
+	// the count of stores. If useCached is true, it uses the cached store
+	// descriptor instead of computing new ones. Implemented by Stores.
+	GetAggregatedStoreStats(useCached bool) (aggregatedCPUUsage int64, totalStoreCount int32)
+}
+
+// NodeCapacityProvider reports node-level cpu usage and capacity by sampling
+// runtime stats and aggregating store-level cpu capacity across all stores. It
+// is used by Store to populate the NodeCapacity field in the StoreDescriptor.
+type NodeCapacityProvider struct {
+	stores             StoresStatsAggregator
+	runtimeLoadMonitor *runtimeLoadMonitor
+}
+
+// NewNodeCapacityProvider creates a new NodeCapacityProvider that monitors CPU
+// metrics using the provided stores aggregator. The optional knobs parameter
+// allows customizing refresh intervals for testing.
+func NewNodeCapacityProvider(
+	stopper *stop.Stopper, stores StoresStatsAggregator, knobs *NodeCapacityProviderTestingKnobs,
+) *NodeCapacityProvider {
+	if stopper == nil || stores == nil {
+		panic("programming error: stopper or stores aggregator cannot be nil")
+	}
+
+	// refreshIntervals define how frequently cpu metrics are updated.
+	const (
+		// defaultCPUUsageRefreshInterval controls how often cpu usage measurements
+		// are taken.
+		defaultCPUUsageRefreshInterval = time.Second
+		// defaultCPUCapacityRefreshInterval controls how often the total CPU
+		// capacity of the node is re-calculated. This is less frequent than usage
+		// since capacity changes happen less often.
+		defaultCPUCapacityRefreshInterval = 10 * time.Second
+	)
+
+	// defaultMovingAverageAge defines the effective time window size. With a
+	// value of 20, the 20th-to-last measurement contributes meaningfully to the
+	// average, while earlier measurements have diminishing impact.
+	const defaultMovingAverageAge = 20
+
+	usageInterval := defaultCPUUsageRefreshInterval
+	capacityInterval := defaultCPUCapacityRefreshInterval
+	if knobs != nil {
+		usageInterval = knobs.CpuUsageRefreshInterval
+		capacityInterval = knobs.CpuCapacityRefreshInterval
+	}
+
+	monitor := &runtimeLoadMonitor{
+		stopper:                 stopper,
+		usageRefreshInterval:    usageInterval,
+		capacityRefreshInterval: capacityInterval,
+	}
+	monitor.mu.usageEWMA = ewma.NewMovingAverage(defaultMovingAverageAge)
+	monitor.recordCPUCapacity(context.Background())
+	return &NodeCapacityProvider{
+		stores:             stores,
+		runtimeLoadMonitor: monitor,
+	}
+}
+
+// Run starts the background monitoring of cpu metrics.
+func (n *NodeCapacityProvider) Run(ctx context.Context) {
+	_ = n.runtimeLoadMonitor.stopper.RunAsyncTask(ctx, "runtime-load-monitor", func(ctx context.Context) {
+		n.runtimeLoadMonitor.run(ctx)
+	})
+}
+
+// GetNodeCapacity returns the NodeCapacity which node-level cpu usage and
+// capacity and aggregated store-level cpu usage. If useCached is true, it will
+// use cached store descriptors to aggregate the sum of store-level cpu
+// capacity.
+func (n *NodeCapacityProvider) GetNodeCapacity(useCached bool) roachpb.NodeCapacity {
+	storesCPURate, numStores := n.stores.GetAggregatedStoreStats(useCached)
+	// TODO(wenyihu6): may be unexpected to caller that useCached only applies to
+	// the stores stats but not runtime load monitor. We can change
+	// runtimeLoadMonitor to also fetch updated stats.
+	// TODO(wenyihu6): NodeCPURateCapacity <= NodeCPURateUsage fails on CI and
+	// requires more investigation.
+	cpuUsageNanoPerSec, cpuCapacityNanoPerSec := n.runtimeLoadMonitor.GetCPUStats()
+	return roachpb.NodeCapacity{
+		StoresCPURate:       storesCPURate,
+		NumStores:           numStores,
+		NodeCPURateCapacity: cpuCapacityNanoPerSec,
+		NodeCPURateUsage:    cpuUsageNanoPerSec,
+	}
+}
+
+// runtimeLoadMonitor polls cpu usage and capacity stats of the node
+// periodically and maintaining a moving average.
+type runtimeLoadMonitor struct {
+	usageRefreshInterval    time.Duration
+	capacityRefreshInterval time.Duration
+	stopper                 *stop.Stopper
+
+	mu struct {
+		syncutil.Mutex
+		// lastTotalUsageNanos tracks cumulative cpu usage in nanoseconds using
+		// status.GetProcCPUTime.
+		lastTotalUsageNanos float64
+		// usageEWMA maintains a moving average of delta cpu usage between two
+		// subsequent polls in nanoseconds. The cpu usage is obtained by polling
+		// stats from status.GetProcCPUTime which is cumulative.
+		usageEWMA ewma.MovingAverage
+		// logicalCPUsPerSec represents the node's cpu capacity in logical
+		// CPU-seconds per second, obtained from status.GetCPUCapacity.
+		logicalCPUsPerSec int64
+	}
+}
+
+// GetCPUStats returns the current cpu usage and capacity stats for the node.
+func (m *runtimeLoadMonitor) GetCPUStats() (cpuUsageNanoPerSec int64, cpuCapacityNanoPerSec int64) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	// usageEWMA is usage in nanoseconds. Divide by refresh interval to get the
+	// per-second nano-sec rate.
+	cpuUsageNanoPerSec = int64(m.mu.usageEWMA.Value() / m.usageRefreshInterval.Seconds())
+	// logicalCPUsPerSec is in logical cpu-seconds per second. Convert the unit
+	// from cpu-seconds to cpu-nanoseconds.
+	cpuCapacityNanoPerSec = m.mu.logicalCPUsPerSec * time.Second.Nanoseconds()
+	return
+}
+
+// recordCPUUsage samples and records the current cpu usage of the node.
+func (m *runtimeLoadMonitor) recordCPUUsage(ctx context.Context) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	userTimeMillis, sysTimeMillis, err := status.GetProcCPUTime(ctx)
+	if err != nil {
+		if buildutil.CrdbTestBuild {
+			panic(err)
+		}
+		// TODO(wenyihu6): we should revisit error handling here for production.
+		log.Warningf(ctx, "failed to get cpu usage: %v", err)
+	}
+	// Convert milliseconds to nanoseconds.
+	totalUsageNanos := float64(userTimeMillis*1e6 + sysTimeMillis*1e6)
+	if buildutil.CrdbTestBuild && m.mu.lastTotalUsageNanos > totalUsageNanos {
+		panic(errors.Newf("programming error: last cpu usage is larger than current: %v > %v",
+			m.mu.lastTotalUsageNanos, totalUsageNanos))
+	}
+	m.mu.usageEWMA.Add(totalUsageNanos - m.mu.lastTotalUsageNanos)
+	m.mu.lastTotalUsageNanos = totalUsageNanos
+}
+
+// recordCPUCapacity samples and records the current cpu capacity of the node.
+func (m *runtimeLoadMonitor) recordCPUCapacity(ctx context.Context) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.mu.logicalCPUsPerSec = int64(status.GetCPUCapacity())
+	if m.mu.logicalCPUsPerSec == 0 {
+		if buildutil.CrdbTestBuild {
+			panic("programming error: cpu capacity is 0")
+		}
+		// TODO(wenyihu6): we should pass in an actual context here.
+		log.Warningf(ctx, "failed to get cpu capacity")
+	}
+}
+
+// run is the main loop of the RuntimeLoadMonitor and periodically polls the cpu
+// usage and capacity. It continues to run until the context is done or the
+// stopper is quiesced.
+func (m *runtimeLoadMonitor) run(ctx context.Context) {
+	usageTimer := time.NewTicker(m.usageRefreshInterval)
+	defer usageTimer.Stop()
+	capacityTimer := time.NewTicker(m.capacityRefreshInterval)
+	defer capacityTimer.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-m.stopper.ShouldQuiesce():
+			return
+		case <-usageTimer.C:
+			usageTimer.Reset(m.usageRefreshInterval)
+			m.recordCPUUsage(ctx)
+		case <-capacityTimer.C:
+			capacityTimer.Reset(m.capacityRefreshInterval)
+			m.recordCPUCapacity(ctx)
+		}
+	}
+}
@@ -0,0 +1,67 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package load_test
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/load"
+	"github.com/cockroachdb/cockroach/pkg/testutils"
+	"github.com/cockroachdb/cockroach/pkg/util/stop"
+	"github.com/cockroachdb/errors"
+	"github.com/stretchr/testify/require"
+)
+
+// mockStoresStatsAggregator implements StoresStatsAggregator for testing.
+type mockStoresStatsAggregator struct {
+	cpuUsage   int64
+	storeCount int32
+}
+
+func (m *mockStoresStatsAggregator) GetAggregatedStoreStats(
+	_ bool,
+) (totalCPUUsage int64, totalStoreCount int32) {
+	return m.cpuUsage, m.storeCount
+}
+
+// TestNodeCapacityProvider tests the basic functionality of the
+// NodeCapacityProvider.
+func TestNodeCapacityProvider(t *testing.T) {
+	stopper := stop.NewStopper()
+	defer stopper.Stop(context.Background())
+
+	mockStores := &mockStoresStatsAggregator{
+		cpuUsage:   1000,
+		storeCount: 3,
+	}
+
+	provider := load.NewNodeCapacityProvider(stopper, mockStores, &load.NodeCapacityProviderTestingKnobs{
+		CpuUsageRefreshInterval:    1 * time.Millisecond,
+		CpuCapacityRefreshInterval: 1 * time.Millisecond,
+	})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	provider.Run(ctx)
+
+	// Provider should have valid stats.
+	testutils.SucceedsSoon(t, func() error {
+		nc := provider.GetNodeCapacity(false)
+		if nc.NodeCPURateUsage == 0 || nc.NodeCPURateCapacity == 0 || nc.StoresCPURate == 0 {
+			return errors.Newf(
+				"CPU usage or capacity is 0: node cpu rate usage %v, node cpu rate capacity %v, stores cpu rate %v",
+				nc.NodeCPURateUsage, nc.NodeCPURateCapacity, nc.StoresCPURate)
+		}
+		return nil
+	})
+
+	cancel()
+	// GetNodeCapacity should still return valid stats after cancellation.
+	nc := provider.GetNodeCapacity(false)
+	require.Greater(t, nc.NodeCPURateCapacity, int64(0))
+	require.Greater(t, nc.NodeCPURateUsage, int64(0))
+}
Original file line number	Diff line number	Diff line change
`@@ -59,4 +59,5 @@ type TestingKnobs struct {`
`59`	`59`	`LicenseTestingKnobs ModuleTestingKnobs`
`60`	`60`	`VecIndexTestingKnobs ModuleTestingKnobs`
`61`	`61`	`PolicyRefresherTestingKnobs ModuleTestingKnobs`
	`62`	`+ NodeCapacityProviderKnobs ModuleTestingKnobs`
`62`	`63`	`}`