Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/metal-api/internal/fsm/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func Events() fsm.Events {
Src: []string{
states.PlannedReboot.String(),
states.PhonedHome.String(),
states.PXEBooting.String(),
states.Preparing.String(),
states.Registering.String(),
states.Waiting.String(),
},
Dst: SelfTransitionState,
},
Expand Down
69 changes: 64 additions & 5 deletions cmd/metal-api/internal/fsm/fsm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package fsm
import (
"context"
"log/slog"
"os"
"testing"
"time"

Expand All @@ -13,7 +14,8 @@ import (
func TestHandleProvisioningEvent(t *testing.T) {
now := time.Now()
lastEventTime := now.Add(-time.Minute * 4)
exceedThresholdTime := now.Add(-time.Minute * 10)
exceedReclaimThresholdTime := now.Add(-time.Minute * 10)
exceedBufferedPhonedHomeThreshold := now.Add(-time.Minute * 10)
tests := []struct {
event *metal.ProvisioningEvent
container *metal.ProvisioningEventContainer
Expand Down Expand Up @@ -427,12 +429,12 @@ func TestHandleProvisioningEvent(t *testing.T) {
container: &metal.ProvisioningEventContainer{
Events: metal.ProvisioningEvents{
{
Time: exceedThresholdTime,
Time: exceedReclaimThresholdTime,
Event: metal.ProvisioningEventMachineReclaim,
},
},
Liveliness: metal.MachineLivelinessAlive,
LastEventTime: &exceedThresholdTime,
LastEventTime: &exceedReclaimThresholdTime,
},
event: &metal.ProvisioningEvent{
Time: now,
Expand All @@ -446,7 +448,7 @@ func TestHandleProvisioningEvent(t *testing.T) {
LastEventTime: &now,
Events: metal.ProvisioningEvents{
{
Time: exceedThresholdTime,
Time: exceedReclaimThresholdTime,
Event: metal.ProvisioningEventMachineReclaim,
},
},
Expand Down Expand Up @@ -626,12 +628,69 @@ func TestHandleProvisioningEvent(t *testing.T) {
},
},
},
{
name: "swallow delayed buffered phoned home event",
container: &metal.ProvisioningEventContainer{
Events: metal.ProvisioningEvents{
{
Time: lastEventTime,
Event: metal.ProvisioningEventWaiting,
},
},
Liveliness: metal.MachineLivelinessAlive,
LastEventTime: &lastEventTime,
},
event: &metal.ProvisioningEvent{
Time: now,
Event: metal.ProvisioningEventPhonedHome,
},
want: &metal.ProvisioningEventContainer{
Liveliness: metal.MachineLivelinessAlive,
LastEventTime: &lastEventTime,
Events: metal.ProvisioningEvents{
{
Time: lastEventTime,
Event: metal.ProvisioningEventWaiting,
},
},
},
},
{
name: "buffered phoned home event threshold exceeded",
container: &metal.ProvisioningEventContainer{
Events: metal.ProvisioningEvents{
{
Time: exceedBufferedPhonedHomeThreshold,
Event: metal.ProvisioningEventWaiting,
},
},
Liveliness: metal.MachineLivelinessAlive,
},
event: &metal.ProvisioningEvent{
Time: now,
Event: metal.ProvisioningEventPhonedHome,
},
want: &metal.ProvisioningEventContainer{
Liveliness: metal.MachineLivelinessAlive,
LastEventTime: &now,
Events: metal.ProvisioningEvents{
{
Time: now,
Event: metal.ProvisioningEventPhonedHome,
},
{
Time: exceedBufferedPhonedHomeThreshold,
Event: metal.ProvisioningEventWaiting,
},
},
},
},
}
for i := range tests {
ctx := context.Background()
tt := tests[i]
t.Run(tt.name, func(t *testing.T) {
got, err := HandleProvisioningEvent(ctx, slog.Default(), tt.container, tt.event)
got, err := HandleProvisioningEvent(ctx, slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})), tt.container, tt.event)
if diff := cmp.Diff(tt.wantErr, err); diff != "" {
t.Errorf("HandleProvisioningEvent() diff = %s", diff)
}
Expand Down
4 changes: 0 additions & 4 deletions cmd/metal-api/internal/fsm/states/phoned-home.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,11 @@ package states
import (
"context"
"log/slog"
"time"

"github.com/looplab/fsm"
"github.com/metal-stack/metal-api/cmd/metal-api/internal/metal"
)

// failedMachineReclaimThreshold is the duration after which the machine reclaim is assumed to have failed.
const failedMachineReclaimThreshold = 5 * time.Minute

type PhonedHomeState struct {
log *slog.Logger
container *metal.ProvisioningEventContainer
Expand Down
10 changes: 10 additions & 0 deletions cmd/metal-api/internal/fsm/states/preparing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package states

import (
"context"
"log/slog"

"github.com/looplab/fsm"
"github.com/metal-stack/metal-api/cmd/metal-api/internal/metal"
Expand All @@ -10,16 +11,25 @@ import (
type PreparingState struct {
container *metal.ProvisioningEventContainer
event *metal.ProvisioningEvent
log *slog.Logger
}

func newPreparing(c *StateConfig) *PreparingState {
return &PreparingState{
container: c.Container,
event: c.Event,
log: c.Log,
}
}

func (p *PreparingState) OnTransition(ctx context.Context, e *fsm.Event) {
if e.Event == metal.ProvisioningEventPhonedHome.String() {
if p.container.LastEventTime != nil && p.event.Time.Sub(*p.container.LastEventTime) < swallowBufferedPhonedHomeThreshold {
p.log.Debug("swallowing delayed phoned home event after preparing event was already received", "id", p.container.ID)
return
}
}

p.container.FailedMachineReclaim = false

appendEventToContainer(p.event, p.container)
Expand Down
10 changes: 10 additions & 0 deletions cmd/metal-api/internal/fsm/states/pxe-booting.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package states

import (
"context"
"log/slog"

"github.com/looplab/fsm"
"github.com/metal-stack/metal-api/cmd/metal-api/internal/metal"
Expand All @@ -10,16 +11,25 @@ import (
type PXEBootingState struct {
container *metal.ProvisioningEventContainer
event *metal.ProvisioningEvent
log *slog.Logger
}

func newPXEBooting(c *StateConfig) *PXEBootingState {
return &PXEBootingState{
container: c.Container,
event: c.Event,
log: c.Log,
}
}

func (p *PXEBootingState) OnTransition(ctx context.Context, e *fsm.Event) {
if e.Event == metal.ProvisioningEventPhonedHome.String() {
if p.container.LastEventTime != nil && p.event.Time.Sub(*p.container.LastEventTime) < swallowBufferedPhonedHomeThreshold {
p.log.Debug("swallowing delayed phoned home event after pxe booting event was already received", "id", p.container.ID)
return
}
}

p.container.FailedMachineReclaim = false

if e.Src == PXEBooting.String() {
Expand Down
10 changes: 10 additions & 0 deletions cmd/metal-api/internal/fsm/states/registering.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package states

import (
"context"
"log/slog"

"github.com/looplab/fsm"
"github.com/metal-stack/metal-api/cmd/metal-api/internal/metal"
Expand All @@ -10,15 +11,24 @@ import (
type RegisteringState struct {
container *metal.ProvisioningEventContainer
event *metal.ProvisioningEvent
log *slog.Logger
}

func newRegistering(c *StateConfig) *RegisteringState {
return &RegisteringState{
container: c.Container,
event: c.Event,
log: c.Log,
}
}

func (p *RegisteringState) OnTransition(ctx context.Context, e *fsm.Event) {
if e.Event == metal.ProvisioningEventPhonedHome.String() {
if p.container.LastEventTime != nil && p.event.Time.Sub(*p.container.LastEventTime) < swallowBufferedPhonedHomeThreshold {
p.log.Debug("swallowing delayed phoned home event after registering event was already received", "id", p.container.ID)
return
}
}

appendEventToContainer(p.event, p.container)
}
6 changes: 6 additions & 0 deletions cmd/metal-api/internal/fsm/states/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package states
import (
"context"
"log/slog"
"time"

"github.com/looplab/fsm"
"github.com/metal-stack/metal-api/cmd/metal-api/internal/metal"
Expand All @@ -21,6 +22,11 @@ const (
PhonedHome stateType = "State Phoned Home"
PlannedReboot stateType = "State Planned Reboot"
MachineReclaim stateType = "State Machine Reclaim"

// failedMachineReclaimThreshold is the duration after which the machine reclaim is assumed to have failed.
failedMachineReclaimThreshold = 5 * time.Minute
// swallowBufferedPhonedHomeThreshold is the duration after which we don't expect any delayed phoned home events to occur.
swallowBufferedPhonedHomeThreshold = 5 * time.Minute
)

type FSMState interface {
Expand Down
9 changes: 9 additions & 0 deletions cmd/metal-api/internal/fsm/states/waiting.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package states

import (
"context"
"log/slog"

"github.com/looplab/fsm"
"github.com/metal-stack/metal-api/cmd/metal-api/internal/metal"
Expand All @@ -10,15 +11,23 @@ import (
type WaitingState struct {
container *metal.ProvisioningEventContainer
event *metal.ProvisioningEvent
log *slog.Logger
}

func newWaiting(c *StateConfig) *WaitingState {
return &WaitingState{
container: c.Container,
event: c.Event,
log: c.Log,
}
}

func (p *WaitingState) OnTransition(ctx context.Context, e *fsm.Event) {
if e.Event == metal.ProvisioningEventPhonedHome.String() {
if p.container.LastEventTime != nil && p.event.Time.Sub(*p.container.LastEventTime) < swallowBufferedPhonedHomeThreshold {
p.log.Debug("swallowing delayed phoned home event after waiting event was already received", "id", p.container.ID)
return
}
}
appendEventToContainer(p.event, p.container)
}