Skip to content

Commit faee3c8

Browse files
committed
fix: prevent race condition deleting shared vertex/langfuse secrets
When multiple sessions exist in the same namespace, the operator would delete the ambient-vertex and ambient-admin-langfuse-secret when one session stopped, even if other sessions were still Running, Creating, or Pending and needed those secrets. This fix adds a check before deleting these shared secrets to count active sessions in the namespace. If any sessions are still active, the secret deletion is skipped. Fixes the MountVolume.SetUp failed error: 'secret "ambient-vertex" not found' Related to PR #494
1 parent 46b7eb0 commit faee3c8

File tree

1 file changed

+62
-2
lines changed

1 file changed

+62
-2
lines changed

components/operator/internal/handlers/sessions.go

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,6 +2115,7 @@ func copySecretToNamespace(ctx context.Context, sourceSecret *corev1.Secret, tar
21152115
}
21162116

21172117
// deleteAmbientVertexSecret deletes the ambient-vertex secret from a namespace if it was copied
2118+
// and no other active sessions in the namespace still need it.
21182119
func deleteAmbientVertexSecret(ctx context.Context, namespace string) error {
21192120
secret, err := config.K8sClient.CoreV1().Secrets(namespace).Get(ctx, types.AmbientVertexSecretName, v1.GetOptions{})
21202121
if err != nil {
@@ -2131,7 +2132,36 @@ func deleteAmbientVertexSecret(ctx context.Context, namespace string) error {
21312132
return nil
21322133
}
21332134

2134-
log.Printf("Deleting copied %s secret from namespace %s", types.AmbientVertexSecretName, namespace)
2135+
// Check if there are other active sessions in this namespace that might need this secret
2136+
// Don't delete the shared secret if other sessions are Running, Creating, or Pending
2137+
gvr := types.GetAgenticSessionResource()
2138+
sessions, err := config.DynamicClient.Resource(gvr).Namespace(namespace).List(ctx, v1.ListOptions{})
2139+
if err != nil {
2140+
log.Printf("Warning: failed to list sessions in namespace %s, skipping secret deletion: %v", namespace, err)
2141+
return nil // Don't delete if we can't verify no other sessions need it
2142+
}
2143+
2144+
activeCount := 0
2145+
for _, session := range sessions.Items {
2146+
status, _, _ := unstructured.NestedMap(session.Object, "status")
2147+
phase := ""
2148+
if status != nil {
2149+
if p, ok := status["phase"].(string); ok {
2150+
phase = p
2151+
}
2152+
}
2153+
// Count sessions that are active and might need the vertex secret
2154+
if phase == "Running" || phase == "Creating" || phase == "Pending" {
2155+
activeCount++
2156+
}
2157+
}
2158+
2159+
if activeCount > 0 {
2160+
log.Printf("Skipping %s secret deletion in namespace %s: %d active session(s) may still need it", types.AmbientVertexSecretName, namespace, activeCount)
2161+
return nil
2162+
}
2163+
2164+
log.Printf("Deleting copied %s secret from namespace %s (no active sessions)", types.AmbientVertexSecretName, namespace)
21352165
err = config.K8sClient.CoreV1().Secrets(namespace).Delete(ctx, types.AmbientVertexSecretName, v1.DeleteOptions{})
21362166
if err != nil && !errors.IsNotFound(err) {
21372167
return fmt.Errorf("failed to delete %s secret: %w", types.AmbientVertexSecretName, err)
@@ -2141,6 +2171,7 @@ func deleteAmbientVertexSecret(ctx context.Context, namespace string) error {
21412171
}
21422172

21432173
// deleteAmbientLangfuseSecret deletes the ambient-admin-langfuse-secret from a namespace if it was copied
2174+
// and no other active sessions in the namespace still need it.
21442175
func deleteAmbientLangfuseSecret(ctx context.Context, namespace string) error {
21452176
const langfuseSecretName = "ambient-admin-langfuse-secret"
21462177
secret, err := config.K8sClient.CoreV1().Secrets(namespace).Get(ctx, langfuseSecretName, v1.GetOptions{})
@@ -2158,7 +2189,36 @@ func deleteAmbientLangfuseSecret(ctx context.Context, namespace string) error {
21582189
return nil
21592190
}
21602191

2161-
log.Printf("Deleting copied %s secret from namespace %s", langfuseSecretName, namespace)
2192+
// Check if there are other active sessions in this namespace that might need this secret
2193+
// Don't delete the shared secret if other sessions are Running, Creating, or Pending
2194+
gvr := types.GetAgenticSessionResource()
2195+
sessions, err := config.DynamicClient.Resource(gvr).Namespace(namespace).List(ctx, v1.ListOptions{})
2196+
if err != nil {
2197+
log.Printf("Warning: failed to list sessions in namespace %s, skipping secret deletion: %v", namespace, err)
2198+
return nil // Don't delete if we can't verify no other sessions need it
2199+
}
2200+
2201+
activeCount := 0
2202+
for _, session := range sessions.Items {
2203+
status, _, _ := unstructured.NestedMap(session.Object, "status")
2204+
phase := ""
2205+
if status != nil {
2206+
if p, ok := status["phase"].(string); ok {
2207+
phase = p
2208+
}
2209+
}
2210+
// Count sessions that are active and might need the langfuse secret
2211+
if phase == "Running" || phase == "Creating" || phase == "Pending" {
2212+
activeCount++
2213+
}
2214+
}
2215+
2216+
if activeCount > 0 {
2217+
log.Printf("Skipping %s secret deletion in namespace %s: %d active session(s) may still need it", langfuseSecretName, namespace, activeCount)
2218+
return nil
2219+
}
2220+
2221+
log.Printf("Deleting copied %s secret from namespace %s (no active sessions)", langfuseSecretName, namespace)
21622222
err = config.K8sClient.CoreV1().Secrets(namespace).Delete(ctx, langfuseSecretName, v1.DeleteOptions{})
21632223
if err != nil && !errors.IsNotFound(err) {
21642224
return fmt.Errorf("failed to delete %s secret: %w", langfuseSecretName, err)

0 commit comments

Comments
 (0)