Temp

rahulgurnani · rahulgurnani · commit b7d01492ec73 · 2025-10-03T23:41:15.000Z
diff --git a/Makefile b/Makefile
@@ -44,7 +44,7 @@ E2E_IMAGE ?= $(IMAGE_TAG)
 # E2E_USE_KIND is a flag used in test-e2e target. when set to true it will load the e2e image into the kind cluster.
 # it is possible though to run e2e tests against clusters other than kind. in such a case, it is the user's responsibility to load
 # the image into the cluster.
-E2E_USE_KIND ?= true
+E2E_USE_KIND ?= false
 
 SYNCER_IMAGE_NAME := lora-syncer
 SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME)
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -118,4 +118,4 @@ spec:
       {{- if .Values.inferenceExtension.tolerations }}
       tolerations:
         {{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }}
-      {{- end }}
+      {{- end }}
diff --git a/config/charts/inferencepool/templates/leader-election-rbac.yaml b/config/charts/inferencepool/templates/leader-election-rbac.yaml
@@ -27,4 +27,4 @@ roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: Role
   name: {{ include "gateway-api-inference-extension.name" . }}-leader-election
-{{- end }}
+{{- end }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -35,7 +35,8 @@ inferenceExtension:
     # Log verbosity
     - name: v
       value: 1
-
+    - name: ha-enable-leader-election
+      value: "true"
   affinity: {}
 
   tolerations: []
@@ -59,9 +60,9 @@ inferencePool:
     - number: 8000
   modelServerType: vllm # vllm, triton-tensorrt-llm
   apiVersion: inference.networking.k8s.io/v1 
-  # modelServers: # REQUIRED
-  #   matchLabels:
-  #     app: vllm-llama3-8b-instruct
+  modelServers: # REQUIRED
+    matchLabels:
+      app: vllm-llama3-8b-instruct
 
   # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2, 
   # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
@@ -19,6 +19,7 @@ package epp
 import (
 	"errors"
 	"fmt"
+	"log"
 	"os"
 	"strings"
 	"testing"
@@ -36,6 +37,7 @@ import (
 	infextv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
+	infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
 	testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
 
 	"helm.sh/helm/v3/pkg/chart/loader"
@@ -84,6 +86,10 @@ const (
 	metricsRbacManifest = "../../testdata/metrics-rbac.yaml"
 	// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
 	modelServerManifestFilepathEnvVar = "MANIFEST_PATH"
+	// replicaCount is the number of replicas of EPP.
+	replicaCount = 3
+
+	name = "vllm-llama3-8b-instruct"
 )
 
 const e2eLeaderElectionEnabledEnvVar = "E2E_LEADER_ELECTION_ENABLED"
@@ -102,15 +108,29 @@ func TestAPIs(t *testing.T) {
 	)
 }
 
-func renderCharts(nsName string) []string {
-	chartPath := "./charts/inferencepool" // Path to your Helm chart
+func renderChartsToYamls(nsName string) []string {
+	chartPath := "/usr/local/google/home/rahulgurnani/gateway-api-inference-extension/config/charts/inferencepool"
 	chart, err := loader.Load(chartPath)
 	if err != nil {
 		panic(fmt.Sprintf("Failed to load chart: %v", err))
 	}
-	values, _ := chartutil.ReadValuesFile("charts/inferencepool/values.yaml")
+	values, _ := chartutil.ReadValuesFile("/usr/local/google/home/rahulgurnani/gateway-api-inference-extension/config/charts/inferencepool/values.yaml")
+	infExt, ok := values["inferenceExtension"].(map[string]interface{})
+	if ok {
+		infExt["replicas"] = replicaCount
+		fmt.Println(infExt)
+		flags, ok := infExt["flags"].([]interface{})
+		if ok {
+			flags = append(flags, map[string]string{
+				"name":  "ha-enable-leader-election",
+				"value": "true",
+			})
+			infExt["flags"] = flags
+		}
+	}
+
 	options := chartutil.ReleaseOptions{
-		Name:      "vllm-llama3-8b-instruct",
+		Name:      name,
 		Namespace: nsName,
 	}
 	renderValues, err := chartutil.ToRenderValues(chart, values, options, nil)
@@ -122,11 +142,22 @@ func renderCharts(nsName string) []string {
 	if err != nil {
 		panic(fmt.Sprintf("Failed to render chart: %v", err))
 	}
-	fmt.Println(rendered)
 	var renderedValues []string
-	for _, v := range rendered {
-		renderedValues = append(renderedValues, v)
+	for fName, renderedChart := range rendered {
+		if strings.Contains(fName, "NOTES.txt") {
+			continue
+		}
+
+		fmt.Println("----------------rendered----------------")
+		fmt.Println(fName)
+		objs := strings.Split(renderedChart, "\n---")
+		for _, obj := range objs {
+			fmt.Println("-----------obj-----------")
+			fmt.Println(obj)
+			renderedValues = append(renderedValues, obj)
+		}
 	}
+
 	return renderedValues
 }
 
@@ -144,6 +175,7 @@ var _ = ginkgo.BeforeSuite(func() {
 		leaderElectionEnabled = true
 		ginkgo.By("Leader election test mode enabled via " + e2eLeaderElectionEnabledEnvVar)
 	}
+	leaderElectionEnabled = true
 
 	ginkgo.By("Setting up the test suite")
 	setupSuite()
@@ -165,8 +197,8 @@ func setupInfra() {
 	}
 	crds := map[string]string{
 		"inferencepools.inference.networking.x-k8s.io":      xInferPoolManifest,
-		"inferenceobjectives.inference.networking.x-k8s.io": xInferObjectiveManifest,
 		"inferencepools.inference.networking.k8s.io":        inferPoolManifest,
+		"inferenceobjectives.inference.networking.x-k8s.io": xInferObjectiveManifest,
 	}
 
 	createCRDs(testConfig, crds)
@@ -179,6 +211,10 @@ func setupInfra() {
 	createClient(testConfig, clientManifest)
 	createEnvoy(testConfig, envoyManifest)
 	createMetricsRbac(testConfig, metricsRbacManifest)
+	createInferExt(cli)
+	createClient(cli, clientManifest)
+	createEnvoy(cli, envoyManifest)
+	createMetricsRbac(cli, metricsRbacManifest)
 	// Run this step last, as it requires additional time for the model server to become ready.
 	ginkgo.By("Creating model server resources from manifest: " + modelServerManifestPath)
 	createModelServer(testConfig, modelServerManifestArray)
@@ -214,6 +250,8 @@ func setupSuite() {
 
 	err = infextv1a2.Install(testConfig.Scheme)
 	// err = infextv1a2.Install(scheme)
+	// TODO: Fix the v1a2 chart
+	err = infextv1a2.Install(scheme)
 	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
 
 	err = infextv1.Install(testConfig.Scheme)
@@ -362,9 +400,13 @@ func createInferExt(testConfig *testutils.TestConfig, filePath string) {
 	}
 func createInferExt(k8sClient client.Client, filePath string) {
 	outManifests := renderCharts(nsName)
+func createInferExt(k8sClient client.Client) {
+	outManifests := renderChartsToYamls(nsName)
 
 	ginkgo.By("Creating inference extension resources from manifest: " + filePath)
 	testutils.CreateObjsFromYaml(testConfig, outManifests)
+	ginkgo.By("Creating inference extension resources from outManifests")
+	createObjsFromYaml(k8sClient, outManifests)
 
 	// Wait for the deployment to exist.
 	deploy := &appsv1.Deployment{
@@ -378,5 +420,65 @@ func createInferExt(k8sClient client.Client, filePath string) {
 		testutils.DeploymentReadyReplicas(testConfig, deploy, 1)
 	} else {
 		testutils.DeploymentAvailable(testConfig, deploy)
+		testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
+	}
+
+	// Wait for the service to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.Service{})
+	}, existsTimeout, interval)
+}
+
+// applyYAMLFile reads a file containing YAML (possibly multiple docs)
+// and applies each object to the cluster.
+func applyYAMLFile(k8sClient client.Client, filePath string) {
+	// Create the resources from the manifest file
+	createObjsFromYaml(k8sClient, readYaml(filePath))
+}
+
+func readYaml(filePath string) []string {
+	ginkgo.By("Reading YAML file: " + filePath)
+	yamlBytes, err := os.ReadFile(filePath)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	// Split multiple docs, if needed
+	return strings.Split(string(yamlBytes), "\n---")
+}
+
+func createObjsFromYaml(k8sClient client.Client, docs []string) {
+	// For each doc, decode and create
+	decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer()
+	for _, doc := range docs {
+		trimmed := strings.TrimSpace(doc)
+		if trimmed == "" {
+			continue
+		}
+
+		// Decode into a runtime.Object
+		obj, gvk, decodeErr := decoder.Decode([]byte(trimmed), nil, nil)
+		if decodeErr != nil {
+			log.Printf("Trimmed: %s", trimmed)
+			continue
+		}
+		gomega.Expect(decodeErr).NotTo(gomega.HaveOccurred(),
+			"Failed to decode YAML document to a Kubernetes object")
+
+		ginkgo.By(fmt.Sprintf("Decoded GVK: %s", gvk))
+
+		unstrObj, ok := obj.(*unstructured.Unstructured)
+		if !ok {
+			// Fallback if it's a typed object
+			unstrObj = &unstructured.Unstructured{}
+			// Convert typed to unstructured
+			err := scheme.Convert(obj, unstrObj, nil)
+			gomega.Expect(err).NotTo(gomega.HaveOccurred())
+		}
+
+		unstrObj.SetNamespace(nsName)
+
+		// Create the object
+		err := k8sClient.Create(ctx, unstrObj)
+		gomega.Expect(err).NotTo(gomega.HaveOccurred(),
+			"Failed to create object from YAML")
 	}
 }
diff --git a/test/e2e/epp/e2e_test.go b/test/e2e/epp/e2e_test.go
@@ -20,6 +20,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"log"
 	"strconv"
 	"strings"
 	"time"
@@ -85,16 +86,18 @@ var _ = ginkgo.Describe("InferencePool", func() {
 			if !leaderElectionEnabled {
 				ginkgo.Skip("Leader election is not enabled for this test run, skipping.")
 			}
-
+			fmt.Println("Leader election enabled")
 			ginkgo.By("Verifying that exactly one EPP pod is ready")
 			gomega.Eventually(func(g gomega.Gomega) {
 				podList := &corev1.PodList{}
+				err := cli.List(ctx, podList, client.InNamespace(nsName), client.MatchingLabels{"inferencepool": inferExtName})
+				fmt.Println("listed nsName")
+				fmt.Printf("err %v", err)
 				err := testConfig.K8sClient.List(testConfig.Context, podList, client.InNamespace(testConfig.NsName), client.MatchingLabels{"app": inferExtName})
 				g.Expect(err).NotTo(gomega.HaveOccurred())
-
 				// The deployment should have 3 replicas for leader election.
 				g.Expect(podList.Items).To(gomega.HaveLen(3))
-
+				fmt.Println(podList.Items[0])
 				readyPods := 0
 				for _, pod := range podList.Items {
 					for _, cond := range pod.Status.Conditions {
@@ -103,6 +106,9 @@ var _ = ginkgo.Describe("InferencePool", func() {
 						}
 					}
 				}
+				fmt.Printf("readpods %d", readyPods)
+				g.Expect(readyPods).To(gomega.Equal(1), "Expected exactly one pod to be ready") // why do we expect only one pod to be ready???
+			}, readyTimeout, interval).Should(gomega.Succeed())
 				g.Expect(readyPods).To(gomega.Equal(1), "Expected exactly one pod to be ready")
 			}, testConfig.ReadyTimeout, testConfig.Interval).Should(gomega.Succeed())
 		})
@@ -139,6 +145,8 @@ var _ = ginkgo.Describe("InferencePool", func() {
 				d := &appsv1.Deployment{}
 				err := testConfig.K8sClient.Get(testConfig.Context, types.NamespacedName{Namespace: testConfig.NsName, Name: inferExtName}, d)
 				g.Expect(err).NotTo(gomega.HaveOccurred())
+				g.Expect(d.Status.Replicas).To(gomega.Equal(int32(replicaCount)), "Deployment should have 3 replicas")
+			}, readyTimeout, interval).Should(gomega.Succeed())
 				g.Expect(d.Status.Replicas).To(gomega.Equal(int32(3)), "Deployment should have 3 replicas")
 			}, testConfig.ReadyTimeout, testConfig.Interval).Should(gomega.Succeed())
 
@@ -260,6 +268,8 @@ func verifyMetrics() {
 
 	// Generate traffic by sending requests through the inference extension
 	ginkgo.By("Generating traffic through the inference extension")
+	curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
+	log.Println("Running curl command in the pod")
 	curlCmd := getCurlCommand(envoyName, testConfig.NsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
 
 	// Run the curl command multiple times to generate some metrics data
@@ -270,6 +280,7 @@ func verifyMetrics() {
 
 	// modify the curl command to generate some error metrics
 	curlCmd[len(curlCmd)-1] = "invalid input"
+	log.Println("Running curl with error command in the pod")
 	for i := 0; i < 5; i++ {
 		_, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", curlCmd)
 		gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -278,6 +289,7 @@ func verifyMetrics() {
 	// Now scrape metrics from the EPP endpoint via the curl pod
 	ginkgo.By("Scraping metrics from the EPP endpoint")
 	podIP := findReadyPod().Status.PodIP
+	log.Println("Found ready pod")
 
 	// Get the authorization token for reading metrics
 	token := ""
@@ -286,6 +298,8 @@ func verifyMetrics() {
 		g.Expect(err).NotTo(gomega.HaveOccurred())
 		g.Expect(t).NotTo(gomega.BeEmpty())
 		token = t
+		log.Println("Got the token")
+	}, existsTimeout, interval).Should(gomega.Succeed())
 	}, testConfig.ExistsTimeout, testConfig.Interval).Should(gomega.Succeed())
 
 	// Construct the metric scraping curl command using Pod IP
@@ -294,6 +308,12 @@ func verifyMetrics() {
 	ginkgo.By("Verifying that all expected metrics are present.")
 	gomega.Eventually(func() error {
 		// Execute the metrics scrape command inside the curl pod
+		log.Println("Execute the metrics scrap command")
+		resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", metricScrapeCmd)
+		log.Println("Response of exec:")
+		log.Println(resp)
+		log.Println("Error in exec:")
+		log.Println(err)
 		resp, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", metricScrapeCmd)
 		if err != nil {
 			return err
@@ -327,13 +347,19 @@ func findReadyPod() *corev1.Pod {
 	var readyPod *corev1.Pod
 	gomega.Eventually(func(g gomega.Gomega) {
 		podList := &corev1.PodList{}
+		log.Printf("Namesapce %s", nsName)
+		log.Printf("inferExtName %s", inferExtName)
+		err := cli.List(ctx, podList, client.InNamespace(nsName), client.MatchingLabels{"inferencepool": inferExtName})
 		err := testConfig.K8sClient.List(testConfig.Context, podList, client.InNamespace(testConfig.NsName), client.MatchingLabels{"app": inferExtName})
 		g.Expect(err).NotTo(gomega.HaveOccurred())
-
+		log.Println("listed pods")
+		log.Println(podList)
 		foundReadyPod := false
 		for i := range podList.Items {
 			pod := &podList.Items[i]
 			for _, cond := range pod.Status.Conditions {
+				log.Println("Condition:")
+				log.Println(cond)
 				if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue {
 					g.Expect(pod.Status.PodIP).NotTo(gomega.BeEmpty(), "Ready pod must have an IP")
 					readyPod = pod
diff --git a/test/utils/utils.go b/test/utils/utils.go
@@ -275,6 +275,7 @@ func DeploymentReadyReplicas(testConfig *TestConfig, deploy *appsv1.Deployment,
 		var fetchedDeploy appsv1.Deployment
 		err := testConfig.K8sClient.Get(testConfig.Context, types.NamespacedName{Namespace: deploy.Namespace, Name: deploy.Name}, &fetchedDeploy)
 		g.Expect(err).NotTo(gomega.HaveOccurred())
+		fmt.Sprintf("%v", fetchedDeploy)
 		g.Expect(fetchedDeploy.Status.ReadyReplicas).To(gomega.BeNumerically(">=", count),
 			fmt.Sprintf("Deployment only has %d ready replicas, want at least %d", fetchedDeploy.Status.ReadyReplicas, count))
 	}, testConfig.ModelReadyTimeout, testConfig.Interval).Should(gomega.Succeed())
@@ -289,6 +290,7 @@ func checkDeploymentStatus(ctx context.Context, cli client.Client, deploy *appsv
 	found := 0
 	for _, want := range conditions {
 		for _, c := range fetchedDeploy.Status.Conditions {
+			fmt.Println(c)
 			if c.Type == want.Type && c.Status == want.Status {
 				found += 1
 			}