cockroachdb · angeladietz · Nov 20, 2025
@@ -31,10 +31,13 @@ const (
 	defaultLBRebalancingInterval   = time.Minute
 )
 
-const DefaultNodeCPURateCapacityNanos = 8 * 1e9        // 8 vcpus
-const DefaultStoreDiskCapacityBytes = 1024 << 30       // 1024 GiB
-const DoubleDefaultNodeCPURateCapacityNanos = 16 * 1e9 // 16 vcpus
-const DoubleDefaultStoreDiskCapacityBytes = 2048 << 30 // 2048 GiB
+const (
+	DefaultNodeCPUCores                   = 8.0        // 8 vcpus
+	DefaultNodeCPURateCapacityNanos       = 8 * 1e9    // 8 vcpus
+	DefaultStoreDiskCapacityBytes         = 1024 << 30 // 1024 GiB
+	DoubleDefaultNodeCPURateCapacityNanos = 16 * 1e9   // 16 vcpus
+	DoubleDefaultStoreDiskCapacityBytes   = 2048 << 30 // 2048 GiB
+)
 
 var (
 	// DefaultStartTime is used as the default beginning time for simulation

@@ -11,6 +11,7 @@ go_library(
         "load.go",
         "new_state.go",
         "new_state_test_helper.go",
+        "node_cpu_cores.go",
         "node_cpu_rate_capacities.go",
         "parser_replica_placement.go",
         "split_decider.go",
@@ -60,6 +61,7 @@ go_test(
         "change_test.go",
         "config_loader_test.go",
         "liveness_test.go",
+        "node_cpu_cores_test.go",
         "node_cpu_rate_capacities_test.go",
         "parser_replica_placement_test.go",
         "split_decider_test.go",

@@ -0,0 +1,19 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package state
+
+type NodeCPUCores []float64
+
+// ToRateCapacityNanos converts NodeCPUCores to node capacities in nanos
+// (NodeCPURateCapacities).
+func (nc NodeCPUCores) ToRateCapacityNanos() NodeCPURateCapacities {
+	res := make(NodeCPURateCapacities, len(nc))
+	const nanosPerSecond = 1e9
+	for i, cores := range nc {
+		res[i] = uint64(cores * nanosPerSecond)
+	}
+	return res
+}
@@ -0,0 +1,52 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package state
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestNodeCPUCores_ToRateCapacityNanos(t *testing.T) {
+	testCases := []struct {
+		name     string
+		cores    NodeCPUCores
+		expected NodeCPURateCapacities
+	}{
+		{
+			name:     "empty",
+			cores:    NodeCPUCores{},
+			expected: NodeCPURateCapacities{},
+		},
+		{
+			name:     "single_core",
+			cores:    NodeCPUCores{1.0},
+			expected: NodeCPURateCapacities{1e9},
+		},
+		{
+			name:     "multiple_cores",
+			cores:    NodeCPUCores{1.0, 2.0, 3.0},
+			expected: NodeCPURateCapacities{1e9, 2e9, 3e9},
+		},
+		{
+			name:     "fractional_cores",
+			cores:    NodeCPUCores{1.5, 2.75, 3.25},
+			expected: NodeCPURateCapacities{1500e6, 2750e6, 3250e6},
+		},
+		{
+			name:     "round_down_to_nearest_nanosecond",
+			cores:    NodeCPUCores{1.4999999998, 2.1111111111999},
+			expected: NodeCPURateCapacities{1499999999, 2111111111},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.expected, tc.cores.ToRateCapacityNanos())
+		})
+	}
+}
@@ -61,11 +61,11 @@ var runAsimTests = envutil.EnvOrDefaultBool("COCKROACH_RUN_ASIM_TESTS", false)
 //     cpu_per_access=0 raft_cpu_per_write=0
 //
 //   - "gen_cluster" [nodes=<int>] [stores_per_node=<int>]
-//     [store_byte_capacity_gib=<int>] [node_cpu_rate_capacity=<int>]
+//     [store_byte_capacity_gib=<int>] [node_cpu_cores=<float>]
 //     Initialize the cluster generator parameters. On the next call to eval,
 //     the cluster generator is called to create the initial state used in the
 //     simulation. The default values are: nodes=3 stores_per_node=1
-//     store_byte_capacity_gib=256, node_cpu_rate_capacity=0.
+//     store_byte_capacity_gib=256, node_cpu_cores=8.0.
 //
 //   - "load_cluster": config=<name>
 //     Load a defined cluster configuration to be the generated cluster in the
@@ -342,7 +342,7 @@ func TestDataDriven(t *testing.T) {
 				case "gen_cluster":
 					var nodes = 3
 					var storesPerNode = 1
-					var nodeCPURateCapacity = []uint64{config.DefaultNodeCPURateCapacityNanos}
+					var nodeCPUCores = []float64{config.DefaultNodeCPUCores}
 					var region []string
 					var nodesPerRegion []int
 					var storeByteCapacityGiB int64 = 256
@@ -351,16 +351,16 @@ func TestDataDriven(t *testing.T) {
 					scanIfExists(t, d, "store_byte_capacity_gib", &storeByteCapacityGiB)
 					scanIfExists(t, d, "region", &region)
 					scanIfExists(t, d, "nodes_per_region", &nodesPerRegion)
-					scanIfExists(t, d, "node_cpu_rate_capacity", &nodeCPURateCapacity)
+					scanIfExists(t, d, "node_cpu_cores", &nodeCPUCores)
 
 					var buf strings.Builder
-					require.NotEmpty(t, nodeCPURateCapacity)
+					require.NotEmpty(t, nodeCPUCores)
 					{
-						n := len(nodeCPURateCapacity)
-						require.True(t, n == 1 || n == nodes, "need to specify node_cpu_rate_capacity for each node")
+						n := len(nodeCPUCores)
+						require.True(t, n == 1 || n == nodes, "need to specify node_cpu_cores for each node")
 
-						for _, cpct := range nodeCPURateCapacity {
-							if cores := float64(cpct) / 1e9; cores < 1 {
+						for _, cores := range nodeCPUCores {
+							if cores < 1.0 {
 								// TODO(mma): fix up the tests that trigger this warning.
 								// TODO(mma): print a warning whenever the measured CPU utilization
 								// on a node exceeds this capacity, as that's likely not what the test
@@ -377,7 +377,7 @@ func TestDataDriven(t *testing.T) {
 						StoreByteCapacity:   storeByteCapacityGiB << 30,
 						Region:              region,
 						NodesPerRegion:      nodesPerRegion,
-						NodeCPURateCapacity: nodeCPURateCapacity,
+						NodeCPURateCapacity: state.NodeCPUCores(nodeCPUCores).ToRateCapacityNanos(),
 					}
 					return buf.String()
 				case "load_cluster":

@@ -9,7 +9,7 @@
 # balance on absolute cpu-nanos. n3 should handle more load due to its higher
 # capacity, but the current implementation doesn't account for this. This is
 # tracked in issue: https://github.com/cockroachdb/cockroach/issues/153777.
-gen_cluster nodes=3 node_cpu_rate_capacity=(8000000000,8000000000,16000000000)
+gen_cluster nodes=3 node_cpu_cores=(8,8,16)
 ----
 
 gen_ranges ranges=200 min_key=1 max_key=10000 placement_type=even

@@ -7,7 +7,7 @@
 #
 # Expected outcome: The allocator should rebalance both replicas and leases to
 # distribute the high-cpu workload more evenly across all 10 nodes.
-gen_cluster nodes=10 node_cpu_rate_capacity=8000000000
+gen_cluster nodes=10 node_cpu_cores=8
 ----
 
 # TODO(wenyihu6): why didn't we balance more replicas/leases - is it because of a very high cpu per range 

@@ -2,7 +2,7 @@
 # there is high CPU load imbalance across a large cluster. The test set-up is
 # similar to high_cpu.txt but is on 25 nodes and with 3x the load for two
 # gen_load commands.
-gen_cluster nodes=25 node_cpu_rate_capacity=8000000000
+gen_cluster nodes=25 node_cpu_cores=8
 ----
 
 setting split_queue_enabled=false

@@ -4,7 +4,7 @@
 # will be able to shed its own leases because it is the leaseholer. There should
 # be a period of lease-rebalancing activity before replica-rebalancing.
 
-gen_cluster nodes=5 node_cpu_rate_capacity=9000000000
+gen_cluster nodes=5 node_cpu_cores=9
 ----
 
 setting split_queue_enabled=false

@@ -16,7 +16,7 @@
 # the CPU overloaded s1, so we should observe a period of lease transfers before
 # any replica based rebalancing away from the store occurs.
 
-gen_cluster nodes=5 node_cpu_rate_capacity=9000000000
+gen_cluster nodes=5 node_cpu_cores=9
 ----
 
 setting split_queue_enabled=false

@@ -4,7 +4,7 @@
 #
 # Expected outcome: mma should rebalance replicas and leases to distribute the
 # cpu load and write load more evenly across all stores.
-gen_cluster nodes=10 node_cpu_rate_capacity=3000000000  stores_per_node=2
+gen_cluster nodes=10 node_cpu_cores=3  stores_per_node=2
 ----
 
 # Read only workload, which generates 1000 request cpu nanos/s evenly over

@@ -5,7 +5,7 @@
 #
 # Expected outcome: two stores should roughly equalize their cpu load and write
 # load via range rebalancing.
-gen_cluster nodes=2 node_cpu_rate_capacity=1000000000
+gen_cluster nodes=2 node_cpu_cores=1
 ----
 
 gen_ranges ranges=100 repl_factor=1 min_key=1 max_key=10000 placement_type=replica_placement bytes_mib=26

@@ -6,7 +6,7 @@
 #
 # Expected outcome: The allocator should rebalance both leases and replicas to
 # achieve more even cpu and write distribution across all nodes.
-gen_cluster nodes=9 node_cpu_rate_capacity=5000000000
+gen_cluster nodes=9 node_cpu_cores=5
 ----
 
 # The placement will be skewed, s.t. n1/s1, n2/s2 and n3/s3 will have all the

@@ -12,7 +12,7 @@
 # ignoreLevel logic in rebalanceStores with the grace duration to start
 # shedding more aggressively and other related changes have made this much
 # better.
-gen_cluster nodes=6 node_cpu_rate_capacity=5000000000
+gen_cluster nodes=6 node_cpu_cores=5
 ----
 
 # The placement will be skewed, s.t. n1/s1, n2/s2 and n3/s3 will have all the

@@ -3,7 +3,7 @@
 # 
 # Expected outcome: The allocator should rebalance both cpu and write load across
 # all stores, with mma achieving better results than sma. 
-gen_cluster nodes=6 node_cpu_rate_capacity=5000000000
+gen_cluster nodes=6 node_cpu_cores=5
 ----
 
 # The placement will be skewed, s.t. n1/s1, n2/s2 and n3/s3 will have all the