Skip to content

Commit 2e09627

Browse files
craig[bot]tbg
andcommitted
Merge #153653
153653: asim: allow configuring heterogeneous CPU r=tbg a=tbg This PR lays the groundwork to allow for `gen_cluster nodes=3 node_cpu_rate_capacity(A,B,C)` where A, B, C aren't identical. If only one value is specified, it's applied to all nodes. In other words, existing tests still work without change. Then it adds the `hetergeneous_cpu` test - the simple scenario outlined in #153516. We see that neither SMA nor MMA relieve the two 8vcpu nodes from their high CPU utilization, even though the 16vcpu node is below 50% utilization. Closes #153516. Epic: CRDB-49117 Co-authored-by: Tobias Grieger <[email protected]>
2 parents 048cb14 + a1ef101 commit 2e09627

22 files changed

+259
-66
lines changed

pkg/kv/kvserver/asim/gen/generator.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,14 @@ type BasicCluster struct {
229229
StoreByteCapacity int64
230230
Region []string
231231
NodesPerRegion []int
232-
NodeCPURateCapacity int64
232+
NodeCPURateCapacity state.NodeCPURateCapacities
233233
}
234234

235235
func (bc BasicCluster) String() string {
236236
var b strings.Builder
237237
_, _ = fmt.Fprintf(&b,
238-
"[nodes: %d, stores_per_node:%d, store_disk_capacity: %dGiB, node_capacity: %dcpu-sec/sec",
239-
bc.Nodes, bc.StoresPerNode, bc.StoreByteCapacity>>30, bc.NodeCPURateCapacity/time.Second.Nanoseconds())
238+
"[nodes: %d, stores_per_node:%d, store_disk_capacity: %dGiB, node_capacity: %s",
239+
bc.Nodes, bc.StoresPerNode, bc.StoreByteCapacity>>30, bc.NodeCPURateCapacity)
240240
if len(bc.Region) != 0 {
241241
_, _ = fmt.Fprintf(&b, ", region: %v, nodes_per_region: %v", bc.Region, bc.NodesPerRegion)
242242
}

pkg/kv/kvserver/asim/state/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ go_library(
1111
"load.go",
1212
"new_state.go",
1313
"new_state_test_helper.go",
14+
"node_cpu_rate_capacities.go",
1415
"parser_replica_placement.go",
1516
"split_decider.go",
1617
"state.go",
@@ -59,6 +60,7 @@ go_test(
5960
"change_test.go",
6061
"config_loader_test.go",
6162
"liveness_test.go",
63+
"node_cpu_rate_capacities_test.go",
6264
"parser_replica_placement_test.go",
6365
"split_decider_test.go",
6466
"state_test.go",

pkg/kv/kvserver/asim/state/config_loader.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ var AllClusterOptions = [...]string{"single_region", "single_region_multi_store"
2626
// SingleRegionConfig is a simple cluster config with a single region and 3
2727
// zones, all have the same number of nodes.
2828
var SingleRegionConfig = ClusterInfo{
29-
NodeCPURateCapacityNanos: config.DefaultNodeCPURateCapacityNanos, // 8vpucs
30-
StoreDiskCapacityBytes: config.DefaultStoreDiskCapacityBytes, // 1024 GiB
29+
NodeCPURateCapacityNanos: []uint64{config.DefaultNodeCPURateCapacityNanos}, // 8vpucs
30+
StoreDiskCapacityBytes: config.DefaultStoreDiskCapacityBytes, // 1024 GiB
3131
Regions: []Region{
3232
{
3333
Name: "US",
@@ -43,8 +43,8 @@ var SingleRegionConfig = ClusterInfo{
4343
// SingleRegionMultiStoreConfig is a simple cluster config with a single region
4444
// and 3 zones, all zones have 1 node and 5 stores per node.
4545
var SingleRegionMultiStoreConfig = ClusterInfo{
46-
NodeCPURateCapacityNanos: config.DefaultNodeCPURateCapacityNanos, // 8 vcpus
47-
StoreDiskCapacityBytes: config.DefaultStoreDiskCapacityBytes, // 1024 GiB
46+
NodeCPURateCapacityNanos: []uint64{config.DefaultNodeCPURateCapacityNanos}, // 8 vcpus
47+
StoreDiskCapacityBytes: config.DefaultStoreDiskCapacityBytes, // 1024 GiB
4848
Regions: []Region{
4949
{
5050
Name: "US",
@@ -59,8 +59,8 @@ var SingleRegionMultiStoreConfig = ClusterInfo{
5959

6060
// MultiRegionConfig is a perfectly balanced cluster config with 3 regions.
6161
var MultiRegionConfig = ClusterInfo{
62-
NodeCPURateCapacityNanos: config.DoubleDefaultNodeCPURateCapacityNanos, // 16 vcpus
63-
StoreDiskCapacityBytes: config.DoubleDefaultStoreDiskCapacityBytes, // 2048 GiB
62+
NodeCPURateCapacityNanos: []uint64{config.DoubleDefaultNodeCPURateCapacityNanos}, // 16 vcpus
63+
StoreDiskCapacityBytes: config.DoubleDefaultStoreDiskCapacityBytes, // 2048 GiB
6464
Regions: []Region{
6565
{
6666
Name: "US_East",
@@ -91,8 +91,8 @@ var MultiRegionConfig = ClusterInfo{
9191

9292
// ComplexConfig is an imbalanced multi-region cluster config.
9393
var ComplexConfig = ClusterInfo{
94-
NodeCPURateCapacityNanos: config.DoubleDefaultNodeCPURateCapacityNanos, // 16 vcpus
95-
StoreDiskCapacityBytes: config.DoubleDefaultStoreDiskCapacityBytes, // 2048 GiB
94+
NodeCPURateCapacityNanos: []uint64{config.DoubleDefaultNodeCPURateCapacityNanos}, // 16 vcpus
95+
StoreDiskCapacityBytes: config.DoubleDefaultStoreDiskCapacityBytes, // 2048 GiB
9696
Regions: []Region{
9797
{
9898
Name: "US_East",
@@ -282,7 +282,7 @@ type Region struct {
282282
type ClusterInfo struct {
283283
Regions []Region
284284
StoreDiskCapacityBytes int64
285-
NodeCPURateCapacityNanos int64
285+
NodeCPURateCapacityNanos NodeCPURateCapacities
286286
}
287287

288288
func (c ClusterInfo) String() (s string) {
@@ -300,7 +300,7 @@ func (c ClusterInfo) String() (s string) {
300300
}
301301
buf.WriteString("]\n")
302302
}
303-
buf.WriteString(fmt.Sprintf("store_disk_capacity=%d bytes, node_cpu_rate_capacity=%d cpu-ns/sec",
303+
buf.WriteString(fmt.Sprintf("store_disk_capacity=%d bytes, node_cpu_rate_capacity=%s",
304304
c.StoreDiskCapacityBytes, c.NodeCPURateCapacityNanos))
305305
return buf.String()
306306
}
@@ -361,6 +361,7 @@ func LoadClusterInfo(c ClusterInfo, settings *config.SimulationSettings) State {
361361
s := newState(settings)
362362
// A new state has a single range - add the replica load for that range.
363363
s.clusterinfo = c
364+
var nodeIdx int
364365
for _, r := range c.Regions {
365366
regionTier := roachpb.Tier{
366367
Key: "region",
@@ -375,7 +376,17 @@ func LoadClusterInfo(c ClusterInfo, settings *config.SimulationSettings) State {
375376
Tiers: []roachpb.Tier{regionTier, zoneTier},
376377
}
377378
for i := 0; i < z.NodeCount; i++ {
378-
node := s.AddNode(c.NodeCPURateCapacityNanos, locality)
379+
var cpuCap uint64
380+
if len(c.NodeCPURateCapacityNanos) == 1 {
381+
// As a special case, if only one CPU is specified, use it for all nodes.
382+
cpuCap = c.NodeCPURateCapacityNanos[0]
383+
} else {
384+
// Otherwise, expect a CPU capacity for each node. Crash if this is
385+
// not the case.
386+
cpuCap = c.NodeCPURateCapacityNanos[nodeIdx]
387+
}
388+
nodeIdx += 1
389+
node := s.AddNode(int64(cpuCap), locality)
379390
storesRequired := z.StoresPerNode
380391
if storesRequired < 1 {
381392
panic(fmt.Sprintf("storesPerNode cannot be less than one but found %v", storesRequired))

pkg/kv/kvserver/asim/state/new_state.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"sort"
1313
"strings"
1414

15+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/config"
1516
"github.com/cockroachdb/cockroach/pkg/roachpb"
1617
)
1718

@@ -253,7 +254,9 @@ func RangesInfoWithDistribution(
253254
func ClusterInfoWithDistribution(
254255
nodeCount int, storesPerNode int, regions []string, regionNodeWeights []float64,
255256
) ClusterInfo {
256-
ret := ClusterInfo{}
257+
ret := ClusterInfo{
258+
NodeCPURateCapacityNanos: []uint64{config.DefaultNodeCPURateCapacityNanos},
259+
}
257260

258261
ret.Regions = make([]Region, len(regions))
259262
availableNodes := nodeCount

pkg/kv/kvserver/asim/state/new_state_test_helper.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ func NewStateWithDistribution(
2727
// Currently multi-store is not tested for correctness. Default to a single
2828
// store per node.
2929
clusterInfo := ClusterInfoWithStoreCount(numNodes, 1 /* storesPerNode */)
30-
clusterInfo.NodeCPURateCapacityNanos = config.DefaultNodeCPURateCapacityNanos
30+
clusterInfo.NodeCPURateCapacityNanos = []uint64{config.DefaultNodeCPURateCapacityNanos}
3131
s := LoadClusterInfo(clusterInfo, settings)
3232

3333
stores := make([]StoreID, numNodes)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package state
7+
8+
import (
9+
"fmt"
10+
"strings"
11+
)
12+
13+
type NodeCPURateCapacities []uint64
14+
15+
func (sl NodeCPURateCapacities) String() string {
16+
if len(sl) == 0 {
17+
return "no cpus"
18+
}
19+
var buf strings.Builder
20+
if len(sl) > 1 {
21+
buf.WriteString("(")
22+
}
23+
for i, cc := range sl {
24+
if i > 0 {
25+
buf.WriteString(" ")
26+
}
27+
28+
// Convert nanoseconds to seconds with up to 2 decimal places
29+
if cc%1e9 == 0 {
30+
fmt.Fprintf(&buf, "%d", cc/1e9)
31+
} else {
32+
fmt.Fprintf(&buf, "%.2f", float64(cc)/1e9)
33+
}
34+
}
35+
if len(sl) > 1 {
36+
buf.WriteString(")")
37+
}
38+
buf.WriteString(" cpu-sec/sec")
39+
return buf.String()
40+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package state
7+
8+
import (
9+
"path/filepath"
10+
"testing"
11+
"time"
12+
13+
"github.com/cockroachdb/cockroach/pkg/testutils/echotest"
14+
)
15+
16+
func TestNodeCPURateCapacities_String(t *testing.T) {
17+
testCases := []struct {
18+
name string
19+
capacity NodeCPURateCapacities
20+
}{
21+
{
22+
name: "empty",
23+
capacity: NodeCPURateCapacities{},
24+
},
25+
{
26+
name: "single_capacity",
27+
capacity: NodeCPURateCapacities{uint64(time.Second.Nanoseconds())},
28+
},
29+
{
30+
name: "multiple_capacities",
31+
capacity: NodeCPURateCapacities{
32+
uint64(2 * time.Second.Nanoseconds()),
33+
uint64(4 * time.Second.Nanoseconds()),
34+
uint64(8 * time.Second.Nanoseconds()),
35+
},
36+
},
37+
{
38+
name: "fractional_seconds",
39+
capacity: NodeCPURateCapacities{
40+
uint64(500 * time.Millisecond.Nanoseconds()),
41+
uint64(1500 * time.Millisecond.Nanoseconds()),
42+
},
43+
},
44+
{
45+
name: "mixed_exact_and_fractional",
46+
capacity: NodeCPURateCapacities{
47+
uint64(time.Second.Nanoseconds()), // 1.00
48+
uint64(500 * time.Millisecond.Nanoseconds()), // 0.50
49+
uint64(2 * time.Second.Nanoseconds()), // 2.00
50+
},
51+
},
52+
}
53+
54+
for _, tc := range testCases {
55+
t.Run(tc.name, func(t *testing.T) {
56+
echotest.Require(t, tc.capacity.String(), filepath.Join("testdata", "NodeCPURateCapacities_String", tc.name+".txt"))
57+
})
58+
}
59+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
echo
2+
----
3+
no cpus
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
echo
2+
----
3+
(0.50 1.50) cpu-sec/sec
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
echo
2+
----
3+
(1 0.50 2) cpu-sec/sec

0 commit comments

Comments
 (0)