diff --git a/pkg/kthena-router/datastore/model_server.go b/pkg/kthena-router/datastore/model_server.go index 7f1c47392..9035895e9 100644 --- a/pkg/kthena-router/datastore/model_server.go +++ b/pkg/kthena-router/datastore/model_server.go @@ -155,6 +155,11 @@ func (m *modelServer) getPrefillPodsForDecodeGroup(pod *PodInfo) []types.Namespa return nil } + // Guard against nil Pod - can occur if pod is deleted mid-scheduling cycle + if pod == nil || pod.Pod == nil { + return nil + } + pdGroup := m.modelServer.Spec.WorkloadSelector.PDGroup pdGroupValue, hasPDGroupKey := pod.Pod.Labels[pdGroup.GroupKey] if !hasPDGroupKey { diff --git a/pkg/kthena-router/datastore/pdgroup_pods_test.go b/pkg/kthena-router/datastore/pdgroup_pods_test.go index f8fc5ca38..b2d298737 100644 --- a/pkg/kthena-router/datastore/pdgroup_pods_test.go +++ b/pkg/kthena-router/datastore/pdgroup_pods_test.go @@ -284,3 +284,57 @@ func TestPDGroupPodRemoval(t *testing.T) { t.Errorf("Expected 0 decode pods after deletion, got %d", len(decodePods)) } } + +// TestGetPrefillPodsForDecodeGroupNilPodInfo verifies that getPrefillPodsForDecodeGroup +// handles nil PodInfo or nil Pod gracefully without panicking. +// This can occur when a pod is deleted mid-scheduling cycle. +func TestGetPrefillPodsForDecodeGroupNilPodInfo(t *testing.T) { + tests := []struct { + name string + podInfo *PodInfo + expectResult []types.NamespacedName + }{ + { + name: "nil PodInfo", + podInfo: nil, + expectResult: nil, + }, + { + name: "PodInfo with nil Pod", + podInfo: &PodInfo{Pod: nil}, + expectResult: nil, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Create a modelServer with PDGroup configuration + ms := newModelServer(&aiv1alpha1.ModelServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "default", + }, + Spec: aiv1alpha1.ModelServerSpec{ + WorkloadSelector: &aiv1alpha1.WorkloadSelector{ + PDGroup: &aiv1alpha1.PDGroup{ + GroupKey: "pd-group", + DecodeLabels: map[string]string{ + "role": "decode", + }, + PrefillLabels: map[string]string{ + "role": "prefill", + }, + }, + }, + }, + }) + + // This should not panic + result := ms.getPrefillPodsForDecodeGroup(tc.podInfo) + + if len(result) != len(tc.expectResult) { + t.Errorf("Expected %d results, got %d", len(tc.expectResult), len(result)) + } + }) + } +}