vllm-project
diff --git a/‎.github/workflows/integration-test-ai-gateway.yml‎
Lines changed: 45 additions & 24 deletions b/‎.github/workflows/integration-test-ai-gateway.yml‎
Lines changed: 45 additions & 24 deletions
diff --git a/‎e2e/pkg/framework/debug.go‎
Lines changed: 181 additions & 0 deletions b/‎e2e/pkg/framework/debug.go‎
Lines changed: 181 additions & 0 deletions
@@ -64,36 +64,57 @@ jobs:
       - name: Run AI Gateway E2E tests
         id: e2e-test
         run: |
+          set +e  # Don't exit on error, we want to capture the result
           make e2e-test PROFILE=ai-gateway
+          TEST_EXIT_CODE=$?
+          echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT
+          exit ${TEST_EXIT_CODE}
         env:
           E2E_VERBOSE: "true"
           KEEP_CLUSTER: "true"
 
-      - name: Show cluster logs on failure
-        if: failure()
+      - name: Upload test reports
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-reports
+          path: |
+            test-report.json
+            test-report.md
+          retention-days: 30
+
+      - name: Create test summary from report
+        if: always()
         run: |
-          echo "=== Kind Cluster Info ==="
-          kind get clusters || true
-          kubectl cluster-info --context kind-semantic-router-e2e || true
-          
-          echo "=== All Pods ==="
-          kubectl get pods --all-namespaces -o wide || true
-          
-          echo "=== Semantic Router Logs ==="
-          kubectl logs -n vllm-semantic-router-system deployment/semantic-router --tail=100 || true
-          
-          echo "=== Envoy Gateway Logs ==="
-          kubectl logs -n envoy-gateway-system deployment/envoy-gateway --tail=100 || true
-          
-          echo "=== AI Gateway Controller Logs ==="
-          kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller --tail=100 || true
-          
-          echo "=== Gateway Resources ==="
-          kubectl get gateway -A || true
-          kubectl get httproute -A || true
-          
-          echo "=== Events ==="
-          kubectl get events --all-namespaces --sort-by='.lastTimestamp' || true
+          if [ -f "test-report.md" ]; then
+            echo "=== Reading test report from test-report.md ==="
+            cat test-report.md >> $GITHUB_STEP_SUMMARY
+
+            # Add additional context
+            cat >> $GITHUB_STEP_SUMMARY << 'EOF'
+
+          ---
+
+          ### 📚 Additional Resources
+
+          - **Trigger:** ${{ github.event_name }}
+          - **Branch:** `${{ github.ref_name }}`
+          - **Commit:** `${{ github.sha }}`
+          - **Workflow Run:** [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+          - [E2E Test Framework Documentation](https://github.com/${{ github.repository }}/tree/main/e2e)
+          - [AI Gateway Profile](https://github.com/${{ github.repository }}/tree/main/e2e/profiles/ai-gateway)
+
+          ### � Artifacts
+
+          - Test reports (JSON and Markdown) are available as workflow artifacts
+          - Reports are retained for 30 days
+          EOF
+          else
+            echo "⚠️ Test report file not found!" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "The E2E test framework did not generate a report file." >> $GITHUB_STEP_SUMMARY
+            echo "This might indicate that the test failed before report generation." >> $GITHUB_STEP_SUMMARY
+          fi
 
       - name: Clean up
         if: always()
 
@@ -0,0 +1,181 @@
+package framework
+
+import (
+	"context"
+	"fmt"
+	"io"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+)
+
+// PrintAllPodsStatus prints detailed status and logs for all pods in all namespaces
+// This is useful for debugging when tests fail
+func PrintAllPodsStatus(ctx context.Context, client *kubernetes.Clientset) {
+	fmt.Printf("\n========== All Pods Status and Logs ==========\n")
+
+	// Get all pods from all namespaces
+	pods, err := client.CoreV1().Pods("").List(ctx, metav1.ListOptions{})
+	if err != nil {
+		fmt.Printf("Failed to list pods: %v\n", err)
+		return
+	}
+
+	fmt.Printf("Total pods: %d\n", len(pods.Items))
+
+	for _, pod := range pods.Items {
+		fmt.Printf("\n--- Pod: %s/%s ---\n", pod.Namespace, pod.Name)
+		fmt.Printf("Phase: %s\n", pod.Status.Phase)
+		fmt.Printf("Node: %s\n", pod.Spec.NodeName)
+		fmt.Printf("Start Time: %v\n", pod.Status.StartTime)
+
+		// Print conditions
+		fmt.Printf("\nConditions:\n")
+		for _, condition := range pod.Status.Conditions {
+			fmt.Printf("  - Type: %s, Status: %s, Reason: %s, Message: %s\n",
+				condition.Type, condition.Status, condition.Reason, condition.Message)
+		}
+
+		// Print container statuses
+		fmt.Printf("\nContainer Statuses:\n")
+		for _, containerStatus := range pod.Status.ContainerStatuses {
+			fmt.Printf("  - Container: %s\n", containerStatus.Name)
+			fmt.Printf("    Ready: %v\n", containerStatus.Ready)
+			fmt.Printf("    RestartCount: %d\n", containerStatus.RestartCount)
+			fmt.Printf("    Image: %s\n", containerStatus.Image)
+
+			if containerStatus.State.Waiting != nil {
+				fmt.Printf("    State: Waiting\n")
+				fmt.Printf("    Reason: %s\n", containerStatus.State.Waiting.Reason)
+				fmt.Printf("    Message: %s\n", containerStatus.State.Waiting.Message)
+			} else if containerStatus.State.Running != nil {
+				fmt.Printf("    State: Running\n")
+				fmt.Printf("    Started At: %v\n", containerStatus.State.Running.StartedAt)
+			} else if containerStatus.State.Terminated != nil {
+				fmt.Printf("    State: Terminated\n")
+				fmt.Printf("    Reason: %s\n", containerStatus.State.Terminated.Reason)
+				fmt.Printf("    Message: %s\n", containerStatus.State.Terminated.Message)
+				fmt.Printf("    Exit Code: %d\n", containerStatus.State.Terminated.ExitCode)
+			}
+
+			if containerStatus.LastTerminationState.Terminated != nil {
+				fmt.Printf("    Last Termination:\n")
+				fmt.Printf("      Reason: %s\n", containerStatus.LastTerminationState.Terminated.Reason)
+				fmt.Printf("      Message: %s\n", containerStatus.LastTerminationState.Terminated.Message)
+				fmt.Printf("      Exit Code: %d\n", containerStatus.LastTerminationState.Terminated.ExitCode)
+			}
+		}
+
+		// Print init container statuses if any
+		if len(pod.Status.InitContainerStatuses) > 0 {
+			fmt.Printf("\nInit Container Statuses:\n")
+			for _, containerStatus := range pod.Status.InitContainerStatuses {
+				fmt.Printf("  - Container: %s\n", containerStatus.Name)
+				fmt.Printf("    Ready: %v\n", containerStatus.Ready)
+				fmt.Printf("    RestartCount: %d\n", containerStatus.RestartCount)
+
+				if containerStatus.State.Waiting != nil {
+					fmt.Printf("    State: Waiting - %s: %s\n",
+						containerStatus.State.Waiting.Reason,
+						containerStatus.State.Waiting.Message)
+				} else if containerStatus.State.Running != nil {
+					fmt.Printf("    State: Running\n")
+				} else if containerStatus.State.Terminated != nil {
+					fmt.Printf("    State: Terminated - %s: %s (Exit Code: %d)\n",
+						containerStatus.State.Terminated.Reason,
+						containerStatus.State.Terminated.Message,
+						containerStatus.State.Terminated.ExitCode)
+				}
+			}
+		}
+
+		// Print events for this pod
+		fmt.Printf("\nRecent Events:\n")
+		events, err := client.CoreV1().Events(pod.Namespace).List(ctx, metav1.ListOptions{
+			FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Pod", pod.Name),
+		})
+		if err == nil && len(events.Items) > 0 {
+			// Sort events by last timestamp (most recent first)
+			for i := len(events.Items) - 1; i >= 0 && i >= len(events.Items)-10; i-- {
+				event := events.Items[i]
+				fmt.Printf("  - [%s] %s: %s (Count: %d)\n",
+					event.LastTimestamp.Format("15:04:05"),
+					event.Reason,
+					event.Message,
+					event.Count)
+			}
+		} else if err != nil {
+			fmt.Printf("  Failed to get events: %v\n", err)
+		} else {
+			fmt.Printf("  No events found\n")
+		}
+
+		// Print container logs
+		fmt.Printf("\nContainer Logs:\n")
+		for _, container := range pod.Spec.Containers {
+			fmt.Printf("\n  --- Logs for container: %s ---\n", container.Name)
+			logOptions := &corev1.PodLogOptions{
+				Container: container.Name,
+				TailLines: int64Ptr(50), // Last 50 lines
+			}
+
+			req := client.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOptions)
+			logs, err := req.Stream(ctx)
+			if err != nil {
+				fmt.Printf("  Failed to get logs: %v\n", err)
+				continue
+			}
+			defer logs.Close()
+
+			logBytes, err := io.ReadAll(logs)
+			if err != nil {
+				fmt.Printf("  Failed to read logs: %v\n", err)
+				continue
+			}
+
+			if len(logBytes) == 0 {
+				fmt.Printf("  (no logs available)\n")
+			} else {
+				fmt.Printf("%s\n", string(logBytes))
+			}
+		}
+
+		// Print init container logs if any failed
+		for _, containerStatus := range pod.Status.InitContainerStatuses {
+			if !containerStatus.Ready {
+				fmt.Printf("\n  --- Logs for init container: %s ---\n", containerStatus.Name)
+				logOptions := &corev1.PodLogOptions{
+					Container: containerStatus.Name,
+					TailLines: int64Ptr(50),
+				}
+
+				req := client.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOptions)
+				logs, err := req.Stream(ctx)
+				if err != nil {
+					fmt.Printf("  Failed to get logs: %v\n", err)
+					continue
+				}
+				defer logs.Close()
+
+				logBytes, err := io.ReadAll(logs)
+				if err != nil {
+					fmt.Printf("  Failed to read logs: %v\n", err)
+					continue
+				}
+
+				if len(logBytes) == 0 {
+					fmt.Printf("  (no logs available)\n")
+				} else {
+					fmt.Printf("%s\n", string(logBytes))
+				}
+			}
+		}
+	}
+	fmt.Printf("\n========================================\n\n")
+}
+
+// int64Ptr returns a pointer to an int64 value
+func int64Ptr(i int64) *int64 {
+	return &i
+}