Skip to content

Commit 8972085

Browse files
committed
asim: specify node capacity in cores, not nanos/sec
Previously, the datadriven asim tests expressed node capacity in nanoseconds/core which are difficult to read due to lots of zeros. Now, the node capacity is expressed in cores (float) which is much more readable and less prone to user error. For example, `node_cpu_rate_capacity=8000000000` is now expressed as `node_cpu_cores=8`. Fixes #156845 Epic: CRDB-55052 Release note: none.
1 parent 33ebee8 commit 8972085

15 files changed

+100
-24
lines changed

pkg/kv/kvserver/asim/config/settings.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,13 @@ const (
3131
defaultLBRebalancingInterval = time.Minute
3232
)
3333

34-
const DefaultNodeCPURateCapacityNanos = 8 * 1e9 // 8 vcpus
35-
const DefaultStoreDiskCapacityBytes = 1024 << 30 // 1024 GiB
36-
const DoubleDefaultNodeCPURateCapacityNanos = 16 * 1e9 // 16 vcpus
37-
const DoubleDefaultStoreDiskCapacityBytes = 2048 << 30 // 2048 GiB
34+
const (
35+
DefaultNodeCPUCores = 8.0 // 8 vcpus
36+
DefaultNodeCPURateCapacityNanos = 8 * 1e9 // 8 vcpus
37+
DefaultStoreDiskCapacityBytes = 1024 << 30 // 1024 GiB
38+
DoubleDefaultNodeCPURateCapacityNanos = 16 * 1e9 // 16 vcpus
39+
DoubleDefaultStoreDiskCapacityBytes = 2048 << 30 // 2048 GiB
40+
)
3841

3942
var (
4043
// DefaultStartTime is used as the default beginning time for simulation

pkg/kv/kvserver/asim/state/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ go_library(
1111
"load.go",
1212
"new_state.go",
1313
"new_state_test_helper.go",
14+
"node_cpu_cores.go",
1415
"node_cpu_rate_capacities.go",
1516
"parser_replica_placement.go",
1617
"split_decider.go",
@@ -60,6 +61,7 @@ go_test(
6061
"change_test.go",
6162
"config_loader_test.go",
6263
"liveness_test.go",
64+
"node_cpu_cores_test.go",
6365
"node_cpu_rate_capacities_test.go",
6466
"parser_replica_placement_test.go",
6567
"split_decider_test.go",
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package state
7+
8+
type NodeCPUCores []float64
9+
10+
// ToRateCapacityNanos converts NodeCPUCores to node capacities in nanos
11+
// (NodeCPURateCapacities).
12+
func (nc NodeCPUCores) ToRateCapacityNanos() NodeCPURateCapacities {
13+
res := make(NodeCPURateCapacities, len(nc))
14+
const nanosPerSecond = 1e9
15+
for i, cores := range nc {
16+
res[i] = uint64(cores * nanosPerSecond)
17+
}
18+
return res
19+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package state
7+
8+
import (
9+
"testing"
10+
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestNodeCPUCores_ToRateCapacityNanos(t *testing.T) {
15+
testCases := []struct {
16+
name string
17+
cores NodeCPUCores
18+
expected NodeCPURateCapacities
19+
}{
20+
{
21+
name: "empty",
22+
cores: NodeCPUCores{},
23+
expected: NodeCPURateCapacities{},
24+
},
25+
{
26+
name: "single_core",
27+
cores: NodeCPUCores{1.0},
28+
expected: NodeCPURateCapacities{1e9},
29+
},
30+
{
31+
name: "multiple_cores",
32+
cores: NodeCPUCores{1.0, 2.0, 3.0},
33+
expected: NodeCPURateCapacities{1e9, 2e9, 3e9},
34+
},
35+
{
36+
name: "fractional_cores",
37+
cores: NodeCPUCores{1.5, 2.75, 3.25},
38+
expected: NodeCPURateCapacities{1500e6, 2750e6, 3250e6},
39+
},
40+
{
41+
name: "round_down_to_nearest_nanosecond",
42+
cores: NodeCPUCores{1.4999999998, 2.1111111111999},
43+
expected: NodeCPURateCapacities{1499999999, 2111111111},
44+
},
45+
}
46+
47+
for _, tc := range testCases {
48+
t.Run(tc.name, func(t *testing.T) {
49+
require.Equal(t, tc.expected, tc.cores.ToRateCapacityNanos())
50+
})
51+
}
52+
}

pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ var runAsimTests = envutil.EnvOrDefaultBool("COCKROACH_RUN_ASIM_TESTS", false)
6161
// cpu_per_access=0 raft_cpu_per_write=0
6262
//
6363
// - "gen_cluster" [nodes=<int>] [stores_per_node=<int>]
64-
// [store_byte_capacity_gib=<int>] [node_cpu_rate_capacity=<int>]
64+
// [store_byte_capacity_gib=<int>] [node_cpu_cores=<float>]
6565
// Initialize the cluster generator parameters. On the next call to eval,
6666
// the cluster generator is called to create the initial state used in the
6767
// simulation. The default values are: nodes=3 stores_per_node=1
68-
// store_byte_capacity_gib=256, node_cpu_rate_capacity=0.
68+
// store_byte_capacity_gib=256, node_cpu_cores=8.0.
6969
//
7070
// - "load_cluster": config=<name>
7171
// Load a defined cluster configuration to be the generated cluster in the
@@ -342,7 +342,7 @@ func TestDataDriven(t *testing.T) {
342342
case "gen_cluster":
343343
var nodes = 3
344344
var storesPerNode = 1
345-
var nodeCPURateCapacity = []uint64{config.DefaultNodeCPURateCapacityNanos}
345+
var nodeCPUCores = []float64{config.DefaultNodeCPUCores}
346346
var region []string
347347
var nodesPerRegion []int
348348
var storeByteCapacityGiB int64 = 256
@@ -351,16 +351,16 @@ func TestDataDriven(t *testing.T) {
351351
scanIfExists(t, d, "store_byte_capacity_gib", &storeByteCapacityGiB)
352352
scanIfExists(t, d, "region", &region)
353353
scanIfExists(t, d, "nodes_per_region", &nodesPerRegion)
354-
scanIfExists(t, d, "node_cpu_rate_capacity", &nodeCPURateCapacity)
354+
scanIfExists(t, d, "node_cpu_cores", &nodeCPUCores)
355355

356356
var buf strings.Builder
357-
require.NotEmpty(t, nodeCPURateCapacity)
357+
require.NotEmpty(t, nodeCPUCores)
358358
{
359-
n := len(nodeCPURateCapacity)
360-
require.True(t, n == 1 || n == nodes, "need to specify node_cpu_rate_capacity for each node")
359+
n := len(nodeCPUCores)
360+
require.True(t, n == 1 || n == nodes, "need to specify node_cpu_cores for each node")
361361

362-
for _, cpct := range nodeCPURateCapacity {
363-
if cores := float64(cpct) / 1e9; cores < 1 {
362+
for _, cores := range nodeCPUCores {
363+
if cores < 1.0 {
364364
// TODO(mma): fix up the tests that trigger this warning.
365365
// TODO(mma): print a warning whenever the measured CPU utilization
366366
// on a node exceeds this capacity, as that's likely not what the test
@@ -377,7 +377,7 @@ func TestDataDriven(t *testing.T) {
377377
StoreByteCapacity: storeByteCapacityGiB << 30,
378378
Region: region,
379379
NodesPerRegion: nodesPerRegion,
380-
NodeCPURateCapacity: nodeCPURateCapacity,
380+
NodeCPURateCapacity: state.NodeCPUCores(nodeCPUCores).ToRateCapacityNanos(),
381381
}
382382
return buf.String()
383383
case "load_cluster":

pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/heterogeneous_cpu.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# balance on absolute cpu-nanos. n3 should handle more load due to its higher
1010
# capacity, but the current implementation doesn't account for this. This is
1111
# tracked in issue: https://github.com/cockroachdb/cockroach/issues/153777.
12-
gen_cluster nodes=3 node_cpu_rate_capacity=(8000000000,8000000000,16000000000)
12+
gen_cluster nodes=3 node_cpu_cores=(8,8,16)
1313
----
1414

1515
gen_ranges ranges=200 min_key=1 max_key=10000 placement_type=even

pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#
88
# Expected outcome: The allocator should rebalance both replicas and leases to
99
# distribute the high-cpu workload more evenly across all 10 nodes.
10-
gen_cluster nodes=10 node_cpu_rate_capacity=8000000000
10+
gen_cluster nodes=10 node_cpu_cores=8
1111
----
1212

1313
# TODO(wenyihu6): why didn't we balance more replicas/leases - is it because of a very high cpu per range

pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_25nodes.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# there is high CPU load imbalance across a large cluster. The test set-up is
33
# similar to high_cpu.txt but is on 25 nodes and with 3x the load for two
44
# gen_load commands.
5-
gen_cluster nodes=25 node_cpu_rate_capacity=8000000000
5+
gen_cluster nodes=25 node_cpu_cores=8
66
----
77

88
setting split_queue_enabled=false

pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_able_to_shed_leases.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# will be able to shed its own leases because it is the leaseholer. There should
55
# be a period of lease-rebalancing activity before replica-rebalancing.
66

7-
gen_cluster nodes=5 node_cpu_rate_capacity=9000000000
7+
gen_cluster nodes=5 node_cpu_cores=9
88
----
99

1010
setting split_queue_enabled=false

pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/high_cpu_unable_to_shed_leases.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# the CPU overloaded s1, so we should observe a period of lease transfers before
1717
# any replica based rebalancing away from the store occurs.
1818

19-
gen_cluster nodes=5 node_cpu_rate_capacity=9000000000
19+
gen_cluster nodes=5 node_cpu_cores=9
2020
----
2121

2222
setting split_queue_enabled=false

0 commit comments

Comments
 (0)