Skip to content

Commit 48aae81

Browse files
authored
feat(policy): add fixed buffer autoscaling policy (#671)
* feat(policy): add new fixedBuffer autoscaling The new policy will add a buffer of rooms of fixed size/amount on top of the occupied rooms. Instead of relying on relative occupancy to determine the amount of rooms in the system * feat(api): new fixedBuffer policy to API Managament API can receive a new policy type for scheduler autoscaling which is the 'fixedBuffer' one. The validations were moved to a struct-level validation so we can infere nested field values based on policy type * chore(docker): remove no_cache from local dev * fix(policy): refactor fixedBuffer map * fix(metrics): default zero on game room metrics * chore(worker): reduce log ingestion
1 parent 34eb41f commit 48aae81

File tree

24 files changed

+1427
-221
lines changed

24 files changed

+1427
-221
lines changed

cmd/commom/commom.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232

3333
"github.com/topfreegames/maestro/internal/config"
3434
"github.com/topfreegames/maestro/internal/config/viper"
35+
"github.com/topfreegames/maestro/internal/core/entities/autoscaling"
3536
"github.com/topfreegames/maestro/internal/service"
3637
"github.com/topfreegames/maestro/internal/validations"
3738
"go.uber.org/zap"
@@ -48,6 +49,12 @@ func ServiceSetup(ctx context.Context, cancelFn context.CancelFunc, logConfig, c
4849
return fmt.Errorf("unable to register validations: %w", err), nil, nil
4950
}
5051

52+
// Register autoscaling policy validation
53+
err = autoscaling.RegisterPolicyValidation(validations.Validate)
54+
if err != nil {
55+
return fmt.Errorf("unable to register autoscaling policy validation: %w", err), nil, nil
56+
}
57+
5158
viperConfig, err := viper.NewViperConfig(configPath)
5259
if err != nil {
5360
return fmt.Errorf("unable to load config: %w", err), nil, nil

cmd/worker/wire.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func initializeWorker(c config.Config, builder *worker.WorkerBuilder) (*workerss
5353
service.NewCreateSchedulerVersionConfig,
5454
service.NewHealthControllerConfig,
5555
service.NewOperationRoomsAddConfig,
56+
service.NewOperationRoomsRemoveConfig,
5657
service.NewRoomManagerConfig,
5758
service.NewRoomManager,
5859
service.NewOperationManagerConfig,

cmd/worker/worker.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func runWorker() {
7474
}
7575

7676
go func() {
77-
zap.L().Info("starting operation cancellation request watcher")
77+
zap.L().Debug("starting operation cancellation request watcher")
7878
err = operationExecutionWorkerManager.WorkerOptions.OperationManager.WatchOperationCancellationRequests(ctx)
7979
if err != nil {
8080
zap.L().With(zap.Error(err)).Info("operation cancellation watcher stopped with error")

docs/tutorials/Autoscaling.md

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ autoscaling:
111111
Maestro has a set of predefined policy types that can be used to configure the autoscaling, each policy will implement
112112
a specific strategy for calculating the desired number of rooms and will have its configurable parameters.
113113

114+
**Note:** Policy types are mutually exclusive. The parameters used will depend on the value of `autoscaling.policy.type`.
115+
114116
### Room Occupancy Policy
115117
The basic concept of this policy is to scale the scheduler up or down based on the actual room occupancy rate, by defining a "buffer" percentage
116118
of ready rooms that Maestro must keep. The desired number of rooms will be given by the following formula:
@@ -190,3 +192,124 @@ Below are some simulated examples of how the room occupancy policy will behave:
190192
| 1 | 1 | 0.3 | 2 | Scale Up: +1 |
191193
| 2 | 2 | 0.9 | 20 | Scale Up: +18 |
192194

195+
### Fixed Buffer Amount Policy
196+
The Fixed Buffer Amount policy maintains a fixed number of rooms on top of the currently occupied rooms. This policy is useful when you want to ensure a consistent buffer of available rooms regardless of the occupancy rate.
197+
198+
The desired number of rooms will be given by the following formula:
199+
200+
```
201+
desiredNumberOfRooms = numberOfOccupiedRooms + fixedBufferAmount
202+
```
203+
204+
Respecting autoscaling boundaries (min and max):
205+
206+
```go
207+
if desiredNumberOfRooms < autoscaling.Min {
208+
desiredNumberOfRooms = autoscaling.Min
209+
}
210+
211+
if autoscaling.Max != -1 && desiredNumberOfRooms > autoscaling.Max {
212+
desiredNumberOfRooms = autoscaling.Max
213+
}
214+
```
215+
216+
Maestro will constantly try to maintain the specified fixed amount of rooms in addition to the occupied rooms, ensuring there are always enough rooms available for new players.
217+
218+
#### Fixed Buffer Policy Parameters
219+
220+
- **fixedBufferAmount** [integer]: The fixed number of rooms that Maestro should maintain on top of the occupied rooms. Must be a value greater than 0 and less than the maximum number of rooms (max) when max is greater than 0.
221+
222+
#### Example
223+
224+
[comment]: <> (YAML version)
225+
<details>
226+
<summary>YAML version</summary>
227+
<div class="highlight highlight-source-yaml position-relative overflow-auto">
228+
<pre>
229+
name: String
230+
game: String
231+
...
232+
autoscaling:
233+
enabled: true
234+
min: 1
235+
max: 100
236+
policy:
237+
type: fixedBuffer
238+
parameters:
239+
fixedBuffer:
240+
amount: 50
241+
</pre>
242+
</div>
243+
</details>
244+
245+
[comment]: <> (JSON version)
246+
<details>
247+
<summary>JSON version</summary>
248+
<div class="highlight highlight-source-yaml position-relative overflow-auto">
249+
<pre>
250+
{
251+
"autoscaling": {
252+
"enabled": true,
253+
"min": 1,
254+
"max": 100,
255+
"policy": {
256+
"type": "fixedBuffer",
257+
"parameters": {
258+
"fixedBuffer": {
259+
"amount": 50
260+
}
261+
}
262+
}
263+
}
264+
}
265+
</pre>
266+
</div>
267+
</details>
268+
269+
[comment]: <> (Coexisting with RoomOccupancy)
270+
<details>
271+
<summary>Coexisting with RoomOccupancy</summary>
272+
<div class="highlight highlight-source-yaml position-relative overflow-auto">
273+
<pre>
274+
{
275+
"autoscaling": {
276+
"enabled": true,
277+
"min": 1,
278+
"max": 100,
279+
"policy": {
280+
"type": "fixedBuffer",
281+
"parameters": {
282+
"fixedBuffer": {
283+
"amount": 50
284+
},
285+
"roomOccupancy": {
286+
"readyTarget": 0.2,
287+
"downThreshold": 0.9
288+
}
289+
}
290+
}
291+
}
292+
}
293+
</pre>
294+
</div>
295+
</details>
296+
297+
Below are some simulated examples of how the fixed buffer amount policy will behave:
298+
299+
> Note that the autoscaling decision will always be limited by the min-max values!
300+
301+
| totalRooms | occupiedRooms | fixedBufferAmount | desiredNumberOfRooms | autoscalingDecision |
302+
|:----------:|:-------------:|:-----------------:|:--------------------:|:-------------------:|
303+
| 100 | 80 | 50 | 130 | Scale Up: +30 |
304+
| 100 | 50 | 50 | 100 | Do Nothing: 0 |
305+
| 100 | 30 | 50 | 80 | Scale Down: -20 |
306+
| 50 | 40 | 20 | 60 | Scale Up: +10 |
307+
| 50 | 30 | 20 | 50 | Do Nothing: 0 |
308+
| 50 | 10 | 20 | 30 | Scale Down: -20 |
309+
| 10 | 5 | 10 | 15 | Scale Up: +5 |
310+
| 10 | 0 | 10 | 10 | Do Nothing: 0 |
311+
| 10 | 0 | 5 | 5 | Scale Down: -5 |
312+
| 5 | 5 | 5 | 10 | Scale Up: +5 |
313+
| 1 | 1 | 3 | 4 | Scale Up: +3 |
314+
| 2 | 2 | 8 | 10 | Scale Up: +8 |
315+

e2e/framework/maestro/docker-compose.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ services:
44
build:
55
context: ../../..
66
dockerfile: ./e2e/framework/maestro/Dockerfile
7-
no_cache: true
87
environment:
98
- MAESTRO_ADAPTERS_ROOMSTORAGE_REDIS_URL=redis://redis:6379/0
109
- MAESTRO_ADAPTERS_SCHEDULERSTORAGE_POSTGRES_URL=postgres://maestro:maestro@postgres:5432/maestro?sslmode=disable
@@ -31,7 +30,6 @@ services:
3130
build:
3231
context: ../../..
3332
dockerfile: ./e2e/framework/maestro/Dockerfile
34-
no_cache: true
3533
environment:
3634
- MAESTRO_ADAPTERS_ROOMSTORAGE_REDIS_URL=redis://redis:6379/0
3735
- MAESTRO_ADAPTERS_INSTANCESTORAGE_REDIS_URL=redis://redis:6379/0
@@ -60,7 +58,6 @@ services:
6058
build:
6159
context: ../../..
6260
dockerfile: ./e2e/framework/maestro/Dockerfile
63-
no_cache: true
6461
environment:
6562
- MAESTRO_ADAPTERS_SCHEDULERSTORAGE_POSTGRES_URL=postgres://maestro:maestro@postgres:5432/maestro?sslmode=disable
6663
- MAESTRO_ADAPTERS_ROOMSTORAGE_REDIS_URL=redis://redis:6379/0
@@ -82,7 +79,6 @@ services:
8279
build:
8380
context: ../../..
8481
dockerfile: ./e2e/framework/maestro/Dockerfile
85-
no_cache: true
8682
environment:
8783
- MAESTRO_ADAPTERS_SCHEDULERSTORAGE_POSTGRES_URL=postgres://maestro:maestro@postgres:5432/maestro?sslmode=disable
8884
- MAESTRO_ADAPTERS_OPERATIONFLOW_REDIS_URL=redis://redis:6379/0

internal/adapters/runtime/kubernetes/scheduler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ func (k *kubernetes) MitigateDisruption(
238238
// If current PDB already matches the desired state regarding MinAvailable and MaxUnavailable being nil,
239239
// no update is needed.
240240
if currentPdbMinAvailable == desiredMinAvailable && pdb.Spec.MaxUnavailable == nil {
241-
k.logger.Info("PDB already in desired state", zap.String(logs.LogFieldSchedulerName, scheduler.Name))
241+
k.logger.Debug("PDB already in desired state", zap.String(logs.LogFieldSchedulerName, scheduler.Name))
242242
return nil // No update needed, success.
243243
}
244244

internal/api/handlers/requestadapters/schedulers.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ func fromApiAutoscalingPolicyParameters(apiPolicyParameters *api.PolicyParameter
385385
if roomOccupancy := apiPolicyParameters.GetRoomOccupancy(); roomOccupancy != nil {
386386
policyParameters.RoomOccupancy = fromApiRoomOccupancyPolicyToEntity(roomOccupancy)
387387
}
388+
if fixedBuffer := apiPolicyParameters.GetFixedBuffer(); fixedBuffer != nil {
389+
policyParameters.FixedBuffer = &autoscaling.FixedBufferParams{
390+
Amount: int(fixedBuffer.GetAmount()),
391+
}
392+
}
388393
return policyParameters
389394
}
390395

@@ -569,6 +574,17 @@ func getAutoscalingPolicy(autoscalingPolicy autoscaling.Policy) *api.Autoscaling
569574
func getPolicyParameters(parameters autoscaling.PolicyParameters) *api.PolicyParameters {
570575
return &api.PolicyParameters{
571576
RoomOccupancy: getRoomOccupancy(parameters.RoomOccupancy),
577+
FixedBuffer: getFixedBuffer(parameters.FixedBuffer),
578+
}
579+
}
580+
581+
func getFixedBuffer(fixedBuffer *autoscaling.FixedBufferParams) *api.FixedBuffer {
582+
if fixedBuffer == nil {
583+
return nil
584+
}
585+
fixedBufferAmount := int32(fixedBuffer.Amount)
586+
return &api.FixedBuffer{
587+
Amount: &fixedBufferAmount,
572588
}
573589
}
574590

internal/core/entities/autoscaling/autoscaling.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ type PolicyType string
3030
const (
3131
// RoomOccupancy is an implemented policy in maestro autoscaler,
3232
// it uses the number of occupied rooms and a ready rooms target percentage to calculate the desired number of rooms in a scheduler.
33-
RoomOccupancy PolicyType = "roomOccupancy"
33+
RoomOccupancy PolicyType = "roomOccupancy"
34+
// FixedBuffer is an implemented policy in maestro autoscaler,
35+
// it uses a fixed amount of ready rooms to calculate the desired number of rooms in a scheduler.
36+
FixedBuffer PolicyType = "fixedBuffer"
3437
DefaultDownThreshold float32 = 0.99
3538
)
3639

@@ -70,17 +73,20 @@ func NewAutoscaling(enabled bool, min, max, cooldown int, policy Policy) (*Autos
7073
// Policy represents the autoscaling policy configuration.
7174
type Policy struct {
7275
// Type indicates the autoscaling policy type.
73-
Type PolicyType `validate:"oneof=roomOccupancy"`
76+
Type PolicyType `validate:"oneof=roomOccupancy fixedBuffer"`
7477
// Parameters indicates the autoscaling policy parameters.
75-
Parameters PolicyParameters
78+
Parameters PolicyParameters `validate:"required"`
7679
}
7780

7881
// PolicyParameters represents the autoscaling policy parameters, its fields validations will
7982
// vary according to the policy type.
8083
type PolicyParameters struct {
8184
// RoomOccupancy represents the parameters for RoomOccupancy policy type, it must be provided if Policy Type is RoomOccupancy.
8285
// +optional
83-
RoomOccupancy *RoomOccupancyParams `validate:"required_for_room_occupancy=Type"`
86+
RoomOccupancy *RoomOccupancyParams
87+
// Amount represents an integer of the amount of rooms that a scheduler should maintain at ready state at all times, it must be provided if Policy Type is Fixed.
88+
// +optional
89+
FixedBuffer *FixedBufferParams
8490
}
8591

8692
// RoomOccupancyParams represents the parameters accepted by rooms occupancy autoscaling properties.
@@ -90,3 +96,9 @@ type RoomOccupancyParams struct {
9096
// DownThreshold indicates the percentage of occupied rooms a scheduler should have to trigger a downscale event.
9197
DownThreshold float64 `validate:"gt=0,lt=1"`
9298
}
99+
100+
// FixedBufferParams represents the parameters accepted by fixed buffer autoscaling properties.
101+
type FixedBufferParams struct {
102+
// Amount indicates the amount of rooms that a scheduler should maintain at ready state at all times.
103+
Amount int `validate:"gt=0"`
104+
}

internal/core/entities/autoscaling/autoscaling_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ func TestNewAutoscaling(t *testing.T) {
3939
if err != nil {
4040
t.Errorf("unexpected error %d'", err)
4141
}
42+
43+
// Register struct-level validation for Policy
44+
err = RegisterPolicyValidation(validations.Validate)
45+
if err != nil {
46+
t.Errorf("unexpected error registering policy validation: %v", err)
47+
}
48+
4249
translator := validations.GetDefaultTranslator()
4350

4451
validRoomOccupancyPolicy := Policy{
@@ -51,6 +58,15 @@ func TestNewAutoscaling(t *testing.T) {
5158
},
5259
}
5360

61+
validFixedBufferPolicy := Policy{
62+
Type: FixedBuffer,
63+
Parameters: PolicyParameters{
64+
FixedBuffer: &FixedBufferParams{
65+
Amount: 5,
66+
},
67+
},
68+
}
69+
5470
t.Run("invalid scenarios", func(t *testing.T) {
5571
t.Run("fails when try to create autoscaling with invalid Min", func(t *testing.T) {
5672
_, err := NewAutoscaling(true, -1, 10, 10, validRoomOccupancyPolicy)
@@ -111,6 +127,22 @@ func TestNewAutoscaling(t *testing.T) {
111127
validationErrs = err.(validator.ValidationErrors)
112128
assert.Equal(t, "DownThreshold must be less than 1", validationErrs[0].Translate(translator))
113129
})
130+
131+
t.Run("fails when try to create autoscaling with invalid fixedBuffer Policy", func(t *testing.T) {
132+
_, err := NewAutoscaling(true, 1, 10, 10, Policy{Type: "fixedBuffer", Parameters: PolicyParameters{}})
133+
validationErrs := err.(validator.ValidationErrors)
134+
assert.Equal(t, "FixedBuffer must not be nil for FixedBuffer policy type", validationErrs[0].Translate(translator))
135+
136+
zeroValue := 0
137+
_, err = NewAutoscaling(true, 1, 10, 10, Policy{Type: "fixedBuffer", Parameters: PolicyParameters{FixedBuffer: &FixedBufferParams{Amount: zeroValue}}})
138+
validationErrs = err.(validator.ValidationErrors)
139+
assert.Equal(t, "Amount must be greater than 0", validationErrs[0].Translate(translator))
140+
141+
negativeValue := -1
142+
_, err = NewAutoscaling(true, 1, 10, 10, Policy{Type: "fixedBuffer", Parameters: PolicyParameters{FixedBuffer: &FixedBufferParams{Amount: negativeValue}}})
143+
validationErrs = err.(validator.ValidationErrors)
144+
assert.Equal(t, "Amount must be greater than 0", validationErrs[0].Translate(translator))
145+
})
114146
})
115147

116148
t.Run("valid scenarios", func(t *testing.T) {
@@ -124,6 +156,17 @@ func TestNewAutoscaling(t *testing.T) {
124156
_, err = NewAutoscaling(false, 50, 100, 100, validRoomOccupancyPolicy)
125157
assert.NoError(t, err)
126158
})
159+
160+
t.Run("success when try to create valid autoscaling with fixedBuffer type", func(t *testing.T) {
161+
_, err := NewAutoscaling(true, 10, -1, 0, validFixedBufferPolicy)
162+
assert.NoError(t, err)
163+
164+
_, err = NewAutoscaling(false, 1, 1, 10, validFixedBufferPolicy)
165+
assert.NoError(t, err)
166+
167+
_, err = NewAutoscaling(false, 50, 100, 100, validFixedBufferPolicy)
168+
assert.NoError(t, err)
169+
})
127170
})
128171

129172
}

0 commit comments

Comments
 (0)