diff --git a/pkg/kv/kvserver/asim/config/settings.go b/pkg/kv/kvserver/asim/config/settings.go index bd0d51f6ba61..4da327ddfe0d 100644 --- a/pkg/kv/kvserver/asim/config/settings.go +++ b/pkg/kv/kvserver/asim/config/settings.go @@ -31,10 +31,13 @@ const ( defaultLBRebalancingInterval = time.Minute ) -const DefaultNodeCPURateCapacityNanos = 8 * 1e9 // 8 vcpus -const DefaultStoreDiskCapacityBytes = 1024 << 30 // 1024 GiB -const DoubleDefaultNodeCPURateCapacityNanos = 16 * 1e9 // 16 vcpus -const DoubleDefaultStoreDiskCapacityBytes = 2048 << 30 // 2048 GiB +const ( + DefaultNodeCPUCores = 8.0 // 8 vcpus + DefaultNodeCPURateCapacityNanos = 8 * 1e9 // 8 vcpus + DefaultStoreDiskCapacityBytes = 1024 << 30 // 1024 GiB + DoubleDefaultNodeCPURateCapacityNanos = 16 * 1e9 // 16 vcpus + DoubleDefaultStoreDiskCapacityBytes = 2048 << 30 // 2048 GiB +) var ( // DefaultStartTime is used as the default beginning time for simulation diff --git a/pkg/kv/kvserver/asim/state/BUILD.bazel b/pkg/kv/kvserver/asim/state/BUILD.bazel index 24da0d71aadf..efe80dd93f45 100644 --- a/pkg/kv/kvserver/asim/state/BUILD.bazel +++ b/pkg/kv/kvserver/asim/state/BUILD.bazel @@ -11,6 +11,7 @@ go_library( "load.go", "new_state.go", "new_state_test_helper.go", + "node_cpu_cores.go", "node_cpu_rate_capacities.go", "parser_replica_placement.go", "split_decider.go", @@ -60,6 +61,7 @@ go_test( "change_test.go", "config_loader_test.go", "liveness_test.go", + "node_cpu_cores_test.go", "node_cpu_rate_capacities_test.go", "parser_replica_placement_test.go", "split_decider_test.go", diff --git a/pkg/kv/kvserver/asim/state/node_cpu_cores.go b/pkg/kv/kvserver/asim/state/node_cpu_cores.go new file mode 100644 index 000000000000..1ee5cf7e7453 --- /dev/null +++ b/pkg/kv/kvserver/asim/state/node_cpu_cores.go @@ -0,0 +1,19 @@ +// Copyright 2025 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package state + +type NodeCPUCores []float64 + +// ToRateCapacityNanos converts NodeCPUCores to node capacities in nanos +// (NodeCPURateCapacities). +func (nc NodeCPUCores) ToRateCapacityNanos() NodeCPURateCapacities { + res := make(NodeCPURateCapacities, len(nc)) + const nanosPerSecond = 1e9 + for i, cores := range nc { + res[i] = uint64(cores * nanosPerSecond) + } + return res +} diff --git a/pkg/kv/kvserver/asim/state/node_cpu_cores_test.go b/pkg/kv/kvserver/asim/state/node_cpu_cores_test.go new file mode 100644 index 000000000000..542068ec4d61 --- /dev/null +++ b/pkg/kv/kvserver/asim/state/node_cpu_cores_test.go @@ -0,0 +1,52 @@ +// Copyright 2025 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package state + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNodeCPUCores_ToRateCapacityNanos(t *testing.T) { + testCases := []struct { + name string + cores NodeCPUCores + expected NodeCPURateCapacities + }{ + { + name: "empty", + cores: NodeCPUCores{}, + expected: NodeCPURateCapacities{}, + }, + { + name: "single_core", + cores: NodeCPUCores{1.0}, + expected: NodeCPURateCapacities{1e9}, + }, + { + name: "multiple_cores", + cores: NodeCPUCores{1.0, 2.0, 3.0}, + expected: NodeCPURateCapacities{1e9, 2e9, 3e9}, + }, + { + name: "fractional_cores", + cores: NodeCPUCores{1.5, 2.75, 3.25}, + expected: NodeCPURateCapacities{1500e6, 2750e6, 3250e6}, + }, + { + name: "round_down_to_nearest_nanosecond", + cores: NodeCPUCores{1.4999999998, 2.1111111111999}, + expected: NodeCPURateCapacities{1499999999, 2111111111}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, tc.cores.ToRateCapacityNanos()) + }) + } +} diff --git a/pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go b/pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go index 93ad95f3faa9..74f72c962761 100644 --- a/pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go +++ b/pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go @@ -61,11 +61,11 @@ var runAsimTests = envutil.EnvOrDefaultBool("COCKROACH_RUN_ASIM_TESTS", false) // cpu_per_access=0 raft_cpu_per_write=0 // // - "gen_cluster" [nodes=] [stores_per_node=] -// [store_byte_capacity_gib=] [node_cpu_rate_capacity=] +// [store_byte_capacity_gib=] [node_cpu_cores=] // Initialize the cluster generator parameters. On the next call to eval, // the cluster generator is called to create the initial state used in the // simulation. The default values are: nodes=3 stores_per_node=1 -// store_byte_capacity_gib=256, node_cpu_rate_capacity=0. +// store_byte_capacity_gib=256, node_cpu_cores=8.0. // // - "load_cluster": config= // Load a defined cluster configuration to be the generated cluster in the @@ -342,7 +342,7 @@ func TestDataDriven(t *testing.T) { case "gen_cluster": var nodes = 3 var storesPerNode = 1 - var nodeCPURateCapacity = []uint64{config.DefaultNodeCPURateCapacityNanos} + var nodeCPUCores = []float64{config.DefaultNodeCPUCores} var region []string var nodesPerRegion []int var storeByteCapacityGiB int64 = 256 @@ -351,16 +351,16 @@ func TestDataDriven(t *testing.T) { scanIfExists(t, d, "store_byte_capacity_gib", &storeByteCapacityGiB) scanIfExists(t, d, "region", ®ion) scanIfExists(t, d, "nodes_per_region", &nodesPerRegion) - scanIfExists(t, d, "node_cpu_rate_capacity", &nodeCPURateCapacity) + scanIfExists(t, d, "node_cpu_cores", &nodeCPUCores) var buf strings.Builder - require.NotEmpty(t, nodeCPURateCapacity) + require.NotEmpty(t, nodeCPUCores) { - n := len(nodeCPURateCapacity) - require.True(t, n == 1 || n == nodes, "need to specify node_cpu_rate_capacity for each node") + n := len(nodeCPUCores) + require.True(t, n == 1 || n == nodes, "need to specify node_cpu_cores for each node") - for _, cpct := range nodeCPURateCapacity { - if cores := float64(cpct) / 1e9; cores < 1 { + for _, cores := range nodeCPUCores { + if cores < 1.0 { // TODO(mma): fix up the tests that trigger this warning. // TODO(mma): print a warning whenever the measured CPU utilization // on a node exceeds this capacity, as that's likely not what the test @@ -377,7 +377,7 @@ func TestDataDriven(t *testing.T) { StoreByteCapacity: storeByteCapacityGiB << 30, Region: region, NodesPerRegion: nodesPerRegion, - NodeCPURateCapacity: nodeCPURateCapacity, + NodeCPURateCapacity: state.NodeCPUCores(nodeCPUCores).ToRateCapacityNanos(), } return buf.String() case "load_cluster": diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/heterogeneous_cpu.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/heterogeneous_cpu.txt index a4a0b66f6347..13a8a638bc8a 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/heterogeneous_cpu.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/heterogeneous_cpu.txt @@ -9,7 +9,7 @@ # balance on absolute cpu-nanos. n3 should handle more load due to its higher # capacity, but the current implementation doesn't account for this. This is # tracked in issue: https://github.com/cockroachdb/cockroach/issues/153777. -gen_cluster nodes=3 node_cpu_rate_capacity=(8000000000,8000000000,16000000000) +gen_cluster nodes=3 node_cpu_cores=(8,8,16) ---- gen_ranges ranges=200 min_key=1 max_key=10000 placement_type=even diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu.txt index 7edc0623d878..bb16931d7cb4 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu.txt @@ -7,7 +7,7 @@ # # Expected outcome: The allocator should rebalance both replicas and leases to # distribute the high-cpu workload more evenly across all 10 nodes. -gen_cluster nodes=10 node_cpu_rate_capacity=8000000000 +gen_cluster nodes=10 node_cpu_cores=8 ---- # TODO(wenyihu6): why didn't we balance more replicas/leases - is it because of a very high cpu per range diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_25nodes.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_25nodes.txt index 6b197ef730a0..d9798387ed43 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_25nodes.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_25nodes.txt @@ -2,7 +2,7 @@ # there is high CPU load imbalance across a large cluster. The test set-up is # similar to high_cpu.txt but is on 25 nodes and with 3x the load for two # gen_load commands. -gen_cluster nodes=25 node_cpu_rate_capacity=8000000000 +gen_cluster nodes=25 node_cpu_cores=8 ---- setting split_queue_enabled=false diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_able_to_shed_leases.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_able_to_shed_leases.txt index 61d828461849..1f75bae60625 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_able_to_shed_leases.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_able_to_shed_leases.txt @@ -4,7 +4,7 @@ # will be able to shed its own leases because it is the leaseholer. There should # be a period of lease-rebalancing activity before replica-rebalancing. -gen_cluster nodes=5 node_cpu_rate_capacity=9000000000 +gen_cluster nodes=5 node_cpu_cores=9 ---- setting split_queue_enabled=false diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_unable_to_shed_leases.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_unable_to_shed_leases.txt index 1a186ac39a63..be4fd5d2e999 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_unable_to_shed_leases.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_unable_to_shed_leases.txt @@ -16,7 +16,7 @@ # the CPU overloaded s1, so we should observe a period of lease transfers before # any replica based rebalancing away from the store occurs. -gen_cluster nodes=5 node_cpu_rate_capacity=9000000000 +gen_cluster nodes=5 node_cpu_cores=9 ---- setting split_queue_enabled=false diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_write_uniform_cpu.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_write_uniform_cpu.txt index 15ee2c6dadb3..8d696f3fd2be 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_write_uniform_cpu.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_write_uniform_cpu.txt @@ -4,7 +4,7 @@ # # Expected outcome: mma should rebalance replicas and leases to distribute the # cpu load and write load more evenly across all stores. -gen_cluster nodes=10 node_cpu_rate_capacity=3000000000 stores_per_node=2 +gen_cluster nodes=10 node_cpu_cores=3 stores_per_node=2 ---- # Read only workload, which generates 1000 request cpu nanos/s evenly over diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/one_voter_skewed_cpu_skewed_write.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/one_voter_skewed_cpu_skewed_write.txt index 5c4c2c66d92e..4ccb1b86650c 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/one_voter_skewed_cpu_skewed_write.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/one_voter_skewed_cpu_skewed_write.txt @@ -5,7 +5,7 @@ # # Expected outcome: two stores should roughly equalize their cpu load and write # load via range rebalancing. -gen_cluster nodes=2 node_cpu_rate_capacity=1000000000 +gen_cluster nodes=2 node_cpu_cores=1 ---- gen_ranges ranges=100 repl_factor=1 min_key=1 max_key=10000 placement_type=replica_placement bytes_mib=26 diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_even_ranges_mma.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_even_ranges_mma.txt index 1ededabaf81d..e5f5fdf9a672 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_even_ranges_mma.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_even_ranges_mma.txt @@ -6,7 +6,7 @@ # # Expected outcome: The allocator should rebalance both leases and replicas to # achieve more even cpu and write distribution across all nodes. -gen_cluster nodes=9 node_cpu_rate_capacity=5000000000 +gen_cluster nodes=9 node_cpu_cores=5 ---- # The placement will be skewed, s.t. n1/s1, n2/s2 and n3/s3 will have all the diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write.txt index ceb6608f78c9..a9c28b8ee76d 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write.txt @@ -12,7 +12,7 @@ # ignoreLevel logic in rebalanceStores with the grace duration to start # shedding more aggressively and other related changes have made this much # better. -gen_cluster nodes=6 node_cpu_rate_capacity=5000000000 +gen_cluster nodes=6 node_cpu_cores=5 ---- # The placement will be skewed, s.t. n1/s1, n2/s2 and n3/s3 will have all the diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write_more_ranges.txt b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write_more_ranges.txt index b5f2243779f5..891fbf6576c4 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write_more_ranges.txt +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_skewed_write_more_ranges.txt @@ -3,7 +3,7 @@ # # Expected outcome: The allocator should rebalance both cpu and write load across # all stores, with mma achieving better results than sma. -gen_cluster nodes=6 node_cpu_rate_capacity=5000000000 +gen_cluster nodes=6 node_cpu_cores=5 ---- # The placement will be skewed, s.t. n1/s1, n2/s2 and n3/s3 will have all the