-
Notifications
You must be signed in to change notification settings - Fork 275
fix(api): remove sandbox from store only on successful removal #2237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b6cf35a
da73155
6fdbb64
2ca286a
b55cafa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -172,14 +172,14 @@ func startRemoving(ctx context.Context, sbx *memorySandbox, opts sandbox.RemoveO | |
| } | ||
| } | ||
|
|
||
| originalState := sbx._data.State | ||
| newState := opts.Action.TargetState | ||
|
|
||
| if transition != nil { | ||
| currentState := sbx._data.State | ||
| sbx.mu.Unlock() | ||
|
|
||
| if currentState != newState && !sandbox.AllowedTransitions[currentState][newState] { | ||
| return false, nil, &sandbox.InvalidStateTransitionError{CurrentState: currentState, TargetState: newState} | ||
| if originalState != newState && !sandbox.AllowedTransitions[originalState][newState] { | ||
| return false, nil, &sandbox.InvalidStateTransitionError{CurrentState: originalState, TargetState: newState} | ||
| } | ||
|
|
||
| logger.L().Debug(ctx, "State transition already in progress to the same state, waiting", logger.WithSandboxID(sbx.SandboxID()), zap.String("state", string(newState))) | ||
|
|
@@ -190,9 +190,9 @@ func startRemoving(ctx context.Context, sbx *memorySandbox, opts sandbox.RemoveO | |
|
|
||
| // If the transition is to the same state just wait | ||
| switch { | ||
| case currentState == newState: | ||
| case originalState == newState: | ||
| return true, func(context.Context, error) {}, nil | ||
| case sandbox.AllowedTransitions[currentState][newState]: | ||
| case sandbox.AllowedTransitions[originalState][newState]: | ||
| return startRemoving(ctx, sbx, sandbox.RemoveOpts{Action: opts.Action}) | ||
| default: | ||
| return false, nil, fmt.Errorf("unexpected state transition") | ||
|
|
@@ -238,11 +238,11 @@ func startRemoving(ctx context.Context, sbx *memorySandbox, opts sandbox.RemoveO | |
| } | ||
|
|
||
| if err != nil { | ||
| // Keep the transition in place so the error stays | ||
| return | ||
| // Revert the state change if the transition failed and it's not a transient transition | ||
| sbx._data.State = originalState | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expired flag not reverted when removal transition failsHigh Severity When Additional Locations (1) |
||
| } | ||
|
|
||
| // The transition is completed and the next transition can be started | ||
| // Remove the transition so the next transition can be started | ||
| sbx.transition = nil | ||
| } | ||
|
|
||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Routing record deleted before removal, not restored on failure
Medium Severity
In
removeSandboxFromNode, the routing catalog entry (routingCatalog.DeleteSandbox) is deleted before the actual pause/kill gRPC call. If the gRPC call fails,RemoveSandboxnow keeps the sandbox in the store with its state reverted to Running, but the routing record is already gone. This leaves the sandbox visible to the API but unreachable for Nomad-managed nodes, a new inconsistency introduced by movingsandboxStore.Removeto only execute on success.Additional Locations (1)
packages/api/internal/orchestrator/delete_instance.go#L113-L122