Skip to content

Commit f4975dd

Browse files
committed
asim: integrate mma store rebalancer
This commit integrates mma store rebalancers to asim.
1 parent c677a9d commit f4975dd

File tree

6 files changed

+82
-19
lines changed

6 files changed

+82
-19
lines changed

pkg/kv/kvserver/asim/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ go_library(
1010
"//pkg/kv/kvserver/asim/gossip",
1111
"//pkg/kv/kvserver/asim/history",
1212
"//pkg/kv/kvserver/asim/metrics",
13+
"//pkg/kv/kvserver/asim/mmaintegration",
1314
"//pkg/kv/kvserver/asim/op",
1415
"//pkg/kv/kvserver/asim/queue",
1516
"//pkg/kv/kvserver/asim/scheduled",

pkg/kv/kvserver/asim/asim.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/gossip"
1414
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/history"
1515
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/metrics"
16+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/mmaintegration"
1617
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/op"
1718
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/queue"
1819
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/scheduled"
@@ -29,8 +30,8 @@ type Simulator struct {
2930
curr time.Time
3031
end time.Time
3132
// interval is the step between ticks for active simulaton components, such
32-
// as the queues, store rebalancer and state changers. It should be set
33-
// lower than the bgInterval, as updated occur more frequently.
33+
// as the queues, store rebalancer and state changers. It should be set lower
34+
// than the bgInterval, as updates occur more frequently.
3435
interval time.Duration
3536

3637
// The simulator can run multiple workload Generators in parallel.
@@ -47,6 +48,8 @@ type Simulator struct {
4748
sqs map[state.StoreID]queue.RangeQueue
4849
// Store rebalancers.
4950
srs map[state.StoreID]storerebalancer.StoreRebalancer
51+
// Multi-metric store rebalancers.
52+
mmSRs map[state.StoreID]*mmaintegration.MMAStoreRebalancer
5053
// Store operation controllers.
5154
controllers map[state.StoreID]op.Controller
5255

@@ -87,6 +90,7 @@ func NewSimulator(
8790
lqs := make(map[state.StoreID]queue.RangeQueue)
8891
sqs := make(map[state.StoreID]queue.RangeQueue)
8992
srs := make(map[state.StoreID]storerebalancer.StoreRebalancer)
93+
mmSRs := make(map[state.StoreID]*mmaintegration.MMAStoreRebalancer)
9094
changer := state.NewReplicaChanger()
9195
controllers := make(map[state.StoreID]op.Controller)
9296

@@ -103,6 +107,7 @@ func NewSimulator(
103107
sqs: sqs,
104108
controllers: controllers,
105109
srs: srs,
110+
mmSRs: mmSRs,
106111
pacers: pacers,
107112
gossip: gossip.NewGossip(initialState, settings),
108113
metrics: m,
@@ -178,6 +183,25 @@ func (s *Simulator) addStore(storeID state.StoreID, tick time.Time) {
178183
s.settings,
179184
storerebalancer.GetStateRaftStatusFn(s.state),
180185
)
186+
// TODO: We add the store to every node's allocator in the cluster
187+
// immediately. This is also updated via gossip, however there is a delay
188+
// after startup. When calling `mma.ProcessStoreLeaseholderMsg` via
189+
// tickMMStoreRebalancers, there may be not be a store state for some
190+
// replicas. Setting it here ensures that the store is always present and
191+
// initiated in each node's allocator. We should instead handle this in mma,
192+
// or integration component.
193+
for _, node := range s.state.Nodes() {
194+
node.MMAllocator().SetStore(state.StoreAttrAndLocFromDesc(
195+
s.state.StoreDescriptors(false, storeID)[0]))
196+
}
197+
s.mmSRs[storeID] = mmaintegration.NewMMAStoreRebalancer(
198+
storeID,
199+
store.NodeID(),
200+
s.state.Node(store.NodeID()).MMAllocator(),
201+
s.state.Node(store.NodeID()).AllocatorSync(),
202+
s.controllers[storeID],
203+
s.settings,
204+
)
181205
}
182206

183207
// GetNextTickTime returns a simulated tick time, or an indication that the
@@ -245,6 +269,9 @@ func (s *Simulator) RunSim(ctx context.Context) {
245269
// Simulate the store rebalancer logic.
246270
s.tickStoreRebalancers(ctx, tick, stateForAlloc)
247271

272+
// Simulate the multi-metric store rebalancer logic.
273+
s.tickMMStoreRebalancers(ctx, tick, stateForAlloc)
274+
248275
// Print tick metrics.
249276
s.tickMetrics(ctx, tick)
250277
}
@@ -345,6 +372,16 @@ func (s *Simulator) tickStoreRebalancers(ctx context.Context, tick time.Time, st
345372
}
346373
}
347374

375+
// tickStoreRebalancers iterates over the multi-metric store rebalancers in the
376+
// cluster and ticks their control loop.
377+
func (s *Simulator) tickMMStoreRebalancers(ctx context.Context, tick time.Time, state state.State) {
378+
stores := s.state.Stores()
379+
s.shuffler(len(stores), func(i, j int) { stores[i], stores[j] = stores[j], stores[i] })
380+
for _, store := range stores {
381+
s.mmSRs[store.StoreID()].Tick(ctx, tick, state)
382+
}
383+
}
384+
348385
// tickMetrics prints the metrics up to the given tick.
349386
func (s *Simulator) tickMetrics(ctx context.Context, tick time.Time) {
350387
s.metrics.Tick(ctx, tick, s.state)

pkg/kv/kvserver/asim/state/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ go_library(
4646
"//pkg/util/metric",
4747
"//pkg/util/stop",
4848
"//pkg/util/timeutil",
49+
"@com_github_cockroachdb_logtags//:logtags",
4950
"@com_github_google_btree//:btree",
5051
"@org_golang_google_protobuf//proto",
5152
],

pkg/kv/kvserver/asim/state/impl.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
"github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigreporter"
3838
"github.com/cockroachdb/cockroach/pkg/util/hlc"
3939
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
40+
"github.com/cockroachdb/logtags"
4041
"github.com/google/btree"
4142
)
4243

@@ -325,6 +326,11 @@ func (s *state) Nodes() []Node {
325326
return nodes
326327
}
327328

329+
func (s *state) Node(nodeID NodeID) Node {
330+
node := s.nodes[nodeID]
331+
return node
332+
}
333+
328334
// RangeFor returns the range containing Key in [StartKey, EndKey). This
329335
// cannot fail.
330336
func (s *state) RangeFor(key Key) Range {
@@ -1138,7 +1144,24 @@ func (s *state) UpdateStorePool(
11381144
detail := storeDescriptors[gossipStoreID]
11391145
copiedDetail := detail.Copy()
11401146
node.storepool.Details.StoreDetails.Store(gossipStoreID, copiedDetail)
1147+
copiedDesc := *copiedDetail.Desc
11411148
// TODO(mma): Support origin timestamps.
1149+
ts := s.clock.Now()
1150+
storeLoadMsg := mmaprototypehelpers.MakeStoreLoadMsg(copiedDesc, ts.UnixNano())
1151+
node.mmAllocator.SetStore(StoreAttrAndLocFromDesc(copiedDesc))
1152+
ctx := logtags.AddTag(context.Background(), fmt.Sprintf("n%d", nodeID), "")
1153+
ctx = logtags.AddTag(ctx, "t", ts.Sub(s.settings.StartTime))
1154+
node.mmAllocator.ProcessStoreLoadMsg(ctx, &storeLoadMsg)
1155+
}
1156+
}
1157+
1158+
func StoreAttrAndLocFromDesc(desc roachpb.StoreDescriptor) mmaprototype.StoreAttributesAndLocality {
1159+
return mmaprototype.StoreAttributesAndLocality{
1160+
StoreID: desc.StoreID,
1161+
NodeID: desc.Node.NodeID,
1162+
NodeAttrs: desc.Node.Attrs,
1163+
NodeLocality: desc.Node.Locality,
1164+
StoreAttrs: desc.Attrs,
11421165
}
11431166
}
11441167

pkg/kv/kvserver/asim/state/state.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ type State interface {
6060
// first flag is false, then the capacity is generated from scratch,
6161
// otherwise the last calculated capacity values are used for each store.
6262
StoreDescriptors(bool, ...StoreID) []roachpb.StoreDescriptor
63+
Node(NodeID) Node
6364
// Nodes returns all nodes that exist in this state.
6465
Nodes() []Node
6566
// RangeFor returns the range containing Key in [StartKey, EndKey). This

pkg/kv/kvserver/asim/tests/testdata/non_rand/example_zone_config.txt

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,22 +53,22 @@ US_West
5353

5454
plot stat=replicas
5555
----
56-
54.70 ┤ ╭╮
57-
51.05 ┤ ╭╮│╰╮ ╭─────────────────────────────────────────────────────────────
58-
47.40 ┤ │╰╮╭╭──╯─────────────────────────────────────────────────────────────
59-
43.76 ┤ ╭╮│╭──╯
60-
40.11 ┤ │╰││╯
61-
36.46 ┤ │
62-
32.82 ┤ │╭
63-
29.17 ┤ ╭╭╯
64-
25.52╭│
65-
21.88
66-
18.23 ┼────────╮
67-
14.59 ┼────────╮╮
68-
10.94 ┤ ╰╰╮╮
69-
7.29 ┤ ╰╰╰───╮
70-
3.65 ┤ ╰╰╰╰─────
71-
0.00 ┤ ╰╰───╰╰──╰╰──────────────────────────────────────────────────────────
56+
56.70 ┤ ╭╮
57+
52.92 ┤ ╭╮│╰╮ ╭─────────────────────────────────────────────────────────────
58+
49.14 ┤ │││ ╭╮╭──────────────────────────────────────────────────────────────
59+
45.36 ┤ ╭╮│╰─╭│╰╯╰
60+
41.58 ┤ │╰│╭──
61+
37.80 ┤ │ ││╰
62+
34.02 ┤ │╭
63+
30.24 ┤ ╭╭
64+
26.46│╭
65+
22.68
66+
18.90 ┤ ╭╭
67+
15.12 ┼────────╮╮
68+
11.34 ┤ ╰╰╮╮
69+
7.56 ┤ ╰╰╰─╮─
70+
3.78 ┤ ╰╰╰╰─────
71+
0.00 ┤ ╰╰──╰─╰─╰────────────────────────────────────────────────────────────
7272
replicas
7373
initial store values: [s1=17, s2=17, s3=17, s4=17, s5=17, s6=16, s7=16, s8=17, s9=17, s10=17, s11=17, s12=17, s13=16, s14=16, s15=16, s16=16, s17=17, s18=17, s19=17, s20=16, s21=17, s22=16, s23=17, s24=17, s25=17, s26=17, s27=17, s28=16, s29=17, s30=16, s31=17, s32=17, s33=16, s34=16, s35=17, s36=17] (stddev=0.47, mean=16.67, sum=600)
74-
last store values: [s1=50, s2=50, s3=49, s4=50, s5=50, s6=50, s7=50, s8=50, s9=50, s10=49, s11=51, s12=51, s13=0, s14=0, s15=0, s16=0, s17=0, s18=0, s19=0, s20=0, s21=0, s22=0, s23=0, s24=0, s25=0, s26=0, s27=0, s28=0, s29=0, s30=0, s31=0, s32=0, s33=0, s34=0, s35=0, s36=0] (stddev=23.57, mean=16.67, sum=600)
74+
last store values: [s1=49, s2=50, s3=50, s4=52, s5=49, s6=52, s7=49, s8=50, s9=50, s10=50, s11=49, s12=50, s13=0, s14=0, s15=0, s16=0, s17=0, s18=0, s19=0, s20=0, s21=0, s22=0, s23=0, s24=0, s25=0, s26=0, s27=0, s28=0, s29=0, s30=0, s31=0, s32=0, s33=0, s34=0, s35=0, s36=0] (stddev=23.58, mean=16.67, sum=600)

0 commit comments

Comments
 (0)