@@ -160,16 +160,9 @@ func (s *Server) createSandbox(ctx context.Context, dynamicClient dynamic.Interf
160160 }
161161 }
162162
163- var createdSandbox * sandboxv1alpha1.Sandbox
164- select {
165- case result := <- resultChan :
166- createdSandbox = result .Sandbox
167- klog .V (2 ).Infof ("sandbox %s/%s running" , createdSandbox .Namespace , createdSandbox .Name )
168- case <- time .After (2 * time .Minute ): // consistent with router settings
169- klog .Warningf ("sandbox %s/%s create timed out" , sandbox .Namespace , sandbox .Name )
170- return nil , fmt .Errorf ("sandbox creation timed out" )
171- }
172-
163+ // Register rollback BEFORE waiting for the sandbox to become ready.
164+ // This ensures the K8s resource and store placeholder are cleaned up on
165+ // timeout, pod-IP failure, or store-update failure — not just on post-creation errors.
173166 needRollbackSandbox := true
174167 sandboxRollbackFunc := func () {
175168 ctxTimeout , cancel := context .WithTimeout (context .Background (), 30 * time .Second )
@@ -180,17 +173,21 @@ func (s *Server) createSandbox(ctx context.Context, dynamicClient dynamic.Interf
180173 err = deleteSandboxClaim (ctxTimeout , dynamicClient , sandboxClaim .Namespace , sandboxClaim .Name )
181174 if err != nil {
182175 klog .Infof ("sandbox claim %s/%s rollback failed: %v" , sandboxClaim .Namespace , sandboxClaim .Name , err )
183- return
176+ } else {
177+ klog .Infof ("sandbox claim %s/%s rollback succeeded" , sandboxClaim .Namespace , sandboxClaim .Name )
184178 }
185- klog .Infof ("sandbox claim %s/%s rollback succeeded" , sandboxClaim .Namespace , sandboxClaim .Name )
186179 } else {
187180 // Rollback Sandbox
188181 err = deleteSandbox (ctxTimeout , dynamicClient , sandbox .Namespace , sandbox .Name )
189182 if err != nil {
190183 klog .Infof ("sandbox %s/%s rollback failed: %v" , sandbox .Namespace , sandbox .Name , err )
191- return
184+ } else {
185+ klog .Infof ("sandbox %s/%s rollback succeeded" , sandbox .Namespace , sandbox .Name )
192186 }
193- klog .Infof ("sandbox %s/%s rollback succeeded" , sandbox .Namespace , sandbox .Name )
187+ }
188+ // Clean up the store placeholder so it does not pollute GC queries
189+ if delErr := s .storeClient .DeleteSandboxBySessionID (ctxTimeout , sandboxEntry .SessionID ); delErr != nil {
190+ klog .Infof ("sandbox %s/%s store placeholder cleanup failed: %v" , sandbox .Namespace , sandbox .Name , delErr )
194191 }
195192 }
196193 defer func () {
@@ -200,6 +197,16 @@ func (s *Server) createSandbox(ctx context.Context, dynamicClient dynamic.Interf
200197 sandboxRollbackFunc ()
201198 }()
202199
200+ var createdSandbox * sandboxv1alpha1.Sandbox
201+ select {
202+ case result := <- resultChan :
203+ createdSandbox = result .Sandbox
204+ klog .V (2 ).Infof ("sandbox %s/%s running" , createdSandbox .Namespace , createdSandbox .Name )
205+ case <- time .After (2 * time .Minute ): // consistent with router settings
206+ klog .Warningf ("sandbox %s/%s create timed out" , sandbox .Namespace , sandbox .Name )
207+ return nil , fmt .Errorf ("sandbox creation timed out" )
208+ }
209+
203210 // agent-sandbox create pod with same name as sandbox if no warmpool is used
204211 // so here we try to get pod IP by sandbox name first
205212 // if warmpool is used, the pod name is stored in sandbox's annotation `agents.x-k8s.io/sandbox-pod-name`
0 commit comments