-
Notifications
You must be signed in to change notification settings - Fork 274
Fix TOCTOU race in template deletion causing FK violation #2231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ import ( | |
|
|
||
| "github.com/e2b-dev/infra/packages/api/internal/orchestrator/nodemanager" | ||
| "github.com/e2b-dev/infra/packages/api/internal/sandbox" | ||
| "github.com/e2b-dev/infra/packages/db/pkg/dberrors" | ||
| "github.com/e2b-dev/infra/packages/db/pkg/types" | ||
| "github.com/e2b-dev/infra/packages/db/queries" | ||
| "github.com/e2b-dev/infra/packages/shared/pkg/grpc/orchestrator" | ||
|
|
@@ -27,12 +28,31 @@ func (PauseQueueExhaustedError) Error() string { | |
| return "The pause queue is exhausted" | ||
| } | ||
|
|
||
| // BaseTemplateDeletedError is returned when a snapshot cannot be created because | ||
| // the base template was deleted between sandbox creation and pause. | ||
| type BaseTemplateDeletedError struct { | ||
| BaseTemplateID string | ||
| } | ||
|
|
||
| func (e BaseTemplateDeletedError) Error() string { | ||
| return fmt.Sprintf("base template '%s' was deleted, cannot create snapshot", e.BaseTemplateID) | ||
| } | ||
|
|
||
| func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox) error { | ||
| ctx, span := tracer.Start(ctx, "pause-sandbox") | ||
| defer span.End() | ||
|
|
||
| result, err := o.throttledUpsertSnapshot(ctx, buildUpsertSnapshotParams(sbx, node)) | ||
| if err != nil { | ||
| // Check if the error is an FK violation on base_env_id, which means | ||
| // the base template was deleted between sandbox creation and pause. | ||
| if dberrors.IsForeignKeyViolation(err) { | ||
| telemetry.ReportError(ctx, "base template was deleted, cannot create snapshot", | ||
| err, telemetry.WithTemplateID(sbx.BaseTemplateID)) | ||
|
|
||
| return BaseTemplateDeletedError{BaseTemplateID: sbx.BaseTemplateID} | ||
|
Comment on lines
+49
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Returning Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| telemetry.ReportCriticalError(ctx, "error inserting snapshot for env", err) | ||
|
|
||
| return err | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| -- name: LockEnvForUpdate :one | ||
| SELECT id FROM "public"."envs" | ||
| WHERE id = @env_id | ||
| FOR UPDATE; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dberrors.IsForeignKeyViolation(err)matches any FK violation fromUpsertSnapshot, not just thebase_env_idone. That query has several other FK constraints —snapshots.team_id, theenv_build_assignments.env_id/build_idedges, andenv_builds.cluster_node_id(if it has a FK). A violation on any of those would be silently reclassified as a non-criticalBaseTemplateDeletedErrorinstead of propagating as a critical error, masking real bugs.Use
pgconn.PgError.ConstraintNameto narrow the check to the specific constraint, e.g.:(adjust the constraint name to match the actual schema)