Skip to content

Commit b7d0149

Browse files
committed
Temp
1 parent f42cf11 commit b7d0149

File tree

7 files changed

+150
-19
lines changed

7 files changed

+150
-19
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ E2E_IMAGE ?= $(IMAGE_TAG)
4444
# E2E_USE_KIND is a flag used in test-e2e target. when set to true it will load the e2e image into the kind cluster.
4545
# it is possible though to run e2e tests against clusters other than kind. in such a case, it is the user's responsibility to load
4646
# the image into the cluster.
47-
E2E_USE_KIND ?= true
47+
E2E_USE_KIND ?= false
4848

4949
SYNCER_IMAGE_NAME := lora-syncer
5050
SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME)

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,4 +118,4 @@ spec:
118118
{{- if .Values.inferenceExtension.tolerations }}
119119
tolerations:
120120
{{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }}
121-
{{- end }}
121+
{{- end }}

config/charts/inferencepool/templates/leader-election-rbac.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ roleRef:
2727
apiGroup: rbac.authorization.k8s.io
2828
kind: Role
2929
name: {{ include "gateway-api-inference-extension.name" . }}-leader-election
30-
{{- end }}
30+
{{- end }}

config/charts/inferencepool/values.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ inferenceExtension:
3535
# Log verbosity
3636
- name: v
3737
value: 1
38-
38+
- name: ha-enable-leader-election
39+
value: "true"
3940
affinity: {}
4041

4142
tolerations: []
@@ -59,9 +60,9 @@ inferencePool:
5960
- number: 8000
6061
modelServerType: vllm # vllm, triton-tensorrt-llm
6162
apiVersion: inference.networking.k8s.io/v1
62-
# modelServers: # REQUIRED
63-
# matchLabels:
64-
# app: vllm-llama3-8b-instruct
63+
modelServers: # REQUIRED
64+
matchLabels:
65+
app: vllm-llama3-8b-instruct
6566

6667
# Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
6768
# This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.

test/e2e/epp/e2e_suite_test.go

Lines changed: 110 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package epp
1919
import (
2020
"errors"
2121
"fmt"
22+
"log"
2223
"os"
2324
"strings"
2425
"testing"
@@ -36,6 +37,7 @@ import (
3637
infextv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
3738
infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
3839
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
40+
infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
3941
testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
4042

4143
"helm.sh/helm/v3/pkg/chart/loader"
@@ -84,6 +86,10 @@ const (
8486
metricsRbacManifest = "../../testdata/metrics-rbac.yaml"
8587
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
8688
modelServerManifestFilepathEnvVar = "MANIFEST_PATH"
89+
// replicaCount is the number of replicas of EPP.
90+
replicaCount = 3
91+
92+
name = "vllm-llama3-8b-instruct"
8793
)
8894

8995
const e2eLeaderElectionEnabledEnvVar = "E2E_LEADER_ELECTION_ENABLED"
@@ -102,15 +108,29 @@ func TestAPIs(t *testing.T) {
102108
)
103109
}
104110

105-
func renderCharts(nsName string) []string {
106-
chartPath := "./charts/inferencepool" // Path to your Helm chart
111+
func renderChartsToYamls(nsName string) []string {
112+
chartPath := "/usr/local/google/home/rahulgurnani/gateway-api-inference-extension/config/charts/inferencepool"
107113
chart, err := loader.Load(chartPath)
108114
if err != nil {
109115
panic(fmt.Sprintf("Failed to load chart: %v", err))
110116
}
111-
values, _ := chartutil.ReadValuesFile("charts/inferencepool/values.yaml")
117+
values, _ := chartutil.ReadValuesFile("/usr/local/google/home/rahulgurnani/gateway-api-inference-extension/config/charts/inferencepool/values.yaml")
118+
infExt, ok := values["inferenceExtension"].(map[string]interface{})
119+
if ok {
120+
infExt["replicas"] = replicaCount
121+
fmt.Println(infExt)
122+
flags, ok := infExt["flags"].([]interface{})
123+
if ok {
124+
flags = append(flags, map[string]string{
125+
"name": "ha-enable-leader-election",
126+
"value": "true",
127+
})
128+
infExt["flags"] = flags
129+
}
130+
}
131+
112132
options := chartutil.ReleaseOptions{
113-
Name: "vllm-llama3-8b-instruct",
133+
Name: name,
114134
Namespace: nsName,
115135
}
116136
renderValues, err := chartutil.ToRenderValues(chart, values, options, nil)
@@ -122,11 +142,22 @@ func renderCharts(nsName string) []string {
122142
if err != nil {
123143
panic(fmt.Sprintf("Failed to render chart: %v", err))
124144
}
125-
fmt.Println(rendered)
126145
var renderedValues []string
127-
for _, v := range rendered {
128-
renderedValues = append(renderedValues, v)
146+
for fName, renderedChart := range rendered {
147+
if strings.Contains(fName, "NOTES.txt") {
148+
continue
149+
}
150+
151+
fmt.Println("----------------rendered----------------")
152+
fmt.Println(fName)
153+
objs := strings.Split(renderedChart, "\n---")
154+
for _, obj := range objs {
155+
fmt.Println("-----------obj-----------")
156+
fmt.Println(obj)
157+
renderedValues = append(renderedValues, obj)
158+
}
129159
}
160+
130161
return renderedValues
131162
}
132163

@@ -144,6 +175,7 @@ var _ = ginkgo.BeforeSuite(func() {
144175
leaderElectionEnabled = true
145176
ginkgo.By("Leader election test mode enabled via " + e2eLeaderElectionEnabledEnvVar)
146177
}
178+
leaderElectionEnabled = true
147179

148180
ginkgo.By("Setting up the test suite")
149181
setupSuite()
@@ -165,8 +197,8 @@ func setupInfra() {
165197
}
166198
crds := map[string]string{
167199
"inferencepools.inference.networking.x-k8s.io": xInferPoolManifest,
168-
"inferenceobjectives.inference.networking.x-k8s.io": xInferObjectiveManifest,
169200
"inferencepools.inference.networking.k8s.io": inferPoolManifest,
201+
"inferenceobjectives.inference.networking.x-k8s.io": xInferObjectiveManifest,
170202
}
171203

172204
createCRDs(testConfig, crds)
@@ -179,6 +211,10 @@ func setupInfra() {
179211
createClient(testConfig, clientManifest)
180212
createEnvoy(testConfig, envoyManifest)
181213
createMetricsRbac(testConfig, metricsRbacManifest)
214+
createInferExt(cli)
215+
createClient(cli, clientManifest)
216+
createEnvoy(cli, envoyManifest)
217+
createMetricsRbac(cli, metricsRbacManifest)
182218
// Run this step last, as it requires additional time for the model server to become ready.
183219
ginkgo.By("Creating model server resources from manifest: " + modelServerManifestPath)
184220
createModelServer(testConfig, modelServerManifestArray)
@@ -214,6 +250,8 @@ func setupSuite() {
214250

215251
err = infextv1a2.Install(testConfig.Scheme)
216252
// err = infextv1a2.Install(scheme)
253+
// TODO: Fix the v1a2 chart
254+
err = infextv1a2.Install(scheme)
217255
gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
218256

219257
err = infextv1.Install(testConfig.Scheme)
@@ -362,9 +400,13 @@ func createInferExt(testConfig *testutils.TestConfig, filePath string) {
362400
}
363401
func createInferExt(k8sClient client.Client, filePath string) {
364402
outManifests := renderCharts(nsName)
403+
func createInferExt(k8sClient client.Client) {
404+
outManifests := renderChartsToYamls(nsName)
365405

366406
ginkgo.By("Creating inference extension resources from manifest: " + filePath)
367407
testutils.CreateObjsFromYaml(testConfig, outManifests)
408+
ginkgo.By("Creating inference extension resources from outManifests")
409+
createObjsFromYaml(k8sClient, outManifests)
368410

369411
// Wait for the deployment to exist.
370412
deploy := &appsv1.Deployment{
@@ -378,5 +420,65 @@ func createInferExt(k8sClient client.Client, filePath string) {
378420
testutils.DeploymentReadyReplicas(testConfig, deploy, 1)
379421
} else {
380422
testutils.DeploymentAvailable(testConfig, deploy)
423+
testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
424+
}
425+
426+
// Wait for the service to exist.
427+
testutils.EventuallyExists(ctx, func() error {
428+
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.Service{})
429+
}, existsTimeout, interval)
430+
}
431+
432+
// applyYAMLFile reads a file containing YAML (possibly multiple docs)
433+
// and applies each object to the cluster.
434+
func applyYAMLFile(k8sClient client.Client, filePath string) {
435+
// Create the resources from the manifest file
436+
createObjsFromYaml(k8sClient, readYaml(filePath))
437+
}
438+
439+
func readYaml(filePath string) []string {
440+
ginkgo.By("Reading YAML file: " + filePath)
441+
yamlBytes, err := os.ReadFile(filePath)
442+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
443+
444+
// Split multiple docs, if needed
445+
return strings.Split(string(yamlBytes), "\n---")
446+
}
447+
448+
func createObjsFromYaml(k8sClient client.Client, docs []string) {
449+
// For each doc, decode and create
450+
decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer()
451+
for _, doc := range docs {
452+
trimmed := strings.TrimSpace(doc)
453+
if trimmed == "" {
454+
continue
455+
}
456+
457+
// Decode into a runtime.Object
458+
obj, gvk, decodeErr := decoder.Decode([]byte(trimmed), nil, nil)
459+
if decodeErr != nil {
460+
log.Printf("Trimmed: %s", trimmed)
461+
continue
462+
}
463+
gomega.Expect(decodeErr).NotTo(gomega.HaveOccurred(),
464+
"Failed to decode YAML document to a Kubernetes object")
465+
466+
ginkgo.By(fmt.Sprintf("Decoded GVK: %s", gvk))
467+
468+
unstrObj, ok := obj.(*unstructured.Unstructured)
469+
if !ok {
470+
// Fallback if it's a typed object
471+
unstrObj = &unstructured.Unstructured{}
472+
// Convert typed to unstructured
473+
err := scheme.Convert(obj, unstrObj, nil)
474+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
475+
}
476+
477+
unstrObj.SetNamespace(nsName)
478+
479+
// Create the object
480+
err := k8sClient.Create(ctx, unstrObj)
481+
gomega.Expect(err).NotTo(gomega.HaveOccurred(),
482+
"Failed to create object from YAML")
381483
}
382484
}

test/e2e/epp/e2e_test.go

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"encoding/json"
2121
"errors"
2222
"fmt"
23+
"log"
2324
"strconv"
2425
"strings"
2526
"time"
@@ -85,16 +86,18 @@ var _ = ginkgo.Describe("InferencePool", func() {
8586
if !leaderElectionEnabled {
8687
ginkgo.Skip("Leader election is not enabled for this test run, skipping.")
8788
}
88-
89+
fmt.Println("Leader election enabled")
8990
ginkgo.By("Verifying that exactly one EPP pod is ready")
9091
gomega.Eventually(func(g gomega.Gomega) {
9192
podList := &corev1.PodList{}
93+
err := cli.List(ctx, podList, client.InNamespace(nsName), client.MatchingLabels{"inferencepool": inferExtName})
94+
fmt.Println("listed nsName")
95+
fmt.Printf("err %v", err)
9296
err := testConfig.K8sClient.List(testConfig.Context, podList, client.InNamespace(testConfig.NsName), client.MatchingLabels{"app": inferExtName})
9397
g.Expect(err).NotTo(gomega.HaveOccurred())
94-
9598
// The deployment should have 3 replicas for leader election.
9699
g.Expect(podList.Items).To(gomega.HaveLen(3))
97-
100+
fmt.Println(podList.Items[0])
98101
readyPods := 0
99102
for _, pod := range podList.Items {
100103
for _, cond := range pod.Status.Conditions {
@@ -103,6 +106,9 @@ var _ = ginkgo.Describe("InferencePool", func() {
103106
}
104107
}
105108
}
109+
fmt.Printf("readpods %d", readyPods)
110+
g.Expect(readyPods).To(gomega.Equal(1), "Expected exactly one pod to be ready") // why do we expect only one pod to be ready???
111+
}, readyTimeout, interval).Should(gomega.Succeed())
106112
g.Expect(readyPods).To(gomega.Equal(1), "Expected exactly one pod to be ready")
107113
}, testConfig.ReadyTimeout, testConfig.Interval).Should(gomega.Succeed())
108114
})
@@ -139,6 +145,8 @@ var _ = ginkgo.Describe("InferencePool", func() {
139145
d := &appsv1.Deployment{}
140146
err := testConfig.K8sClient.Get(testConfig.Context, types.NamespacedName{Namespace: testConfig.NsName, Name: inferExtName}, d)
141147
g.Expect(err).NotTo(gomega.HaveOccurred())
148+
g.Expect(d.Status.Replicas).To(gomega.Equal(int32(replicaCount)), "Deployment should have 3 replicas")
149+
}, readyTimeout, interval).Should(gomega.Succeed())
142150
g.Expect(d.Status.Replicas).To(gomega.Equal(int32(3)), "Deployment should have 3 replicas")
143151
}, testConfig.ReadyTimeout, testConfig.Interval).Should(gomega.Succeed())
144152

@@ -260,6 +268,8 @@ func verifyMetrics() {
260268

261269
// Generate traffic by sending requests through the inference extension
262270
ginkgo.By("Generating traffic through the inference extension")
271+
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
272+
log.Println("Running curl command in the pod")
263273
curlCmd := getCurlCommand(envoyName, testConfig.NsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
264274

265275
// Run the curl command multiple times to generate some metrics data
@@ -270,6 +280,7 @@ func verifyMetrics() {
270280

271281
// modify the curl command to generate some error metrics
272282
curlCmd[len(curlCmd)-1] = "invalid input"
283+
log.Println("Running curl with error command in the pod")
273284
for i := 0; i < 5; i++ {
274285
_, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", curlCmd)
275286
gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -278,6 +289,7 @@ func verifyMetrics() {
278289
// Now scrape metrics from the EPP endpoint via the curl pod
279290
ginkgo.By("Scraping metrics from the EPP endpoint")
280291
podIP := findReadyPod().Status.PodIP
292+
log.Println("Found ready pod")
281293

282294
// Get the authorization token for reading metrics
283295
token := ""
@@ -286,6 +298,8 @@ func verifyMetrics() {
286298
g.Expect(err).NotTo(gomega.HaveOccurred())
287299
g.Expect(t).NotTo(gomega.BeEmpty())
288300
token = t
301+
log.Println("Got the token")
302+
}, existsTimeout, interval).Should(gomega.Succeed())
289303
}, testConfig.ExistsTimeout, testConfig.Interval).Should(gomega.Succeed())
290304

291305
// Construct the metric scraping curl command using Pod IP
@@ -294,6 +308,12 @@ func verifyMetrics() {
294308
ginkgo.By("Verifying that all expected metrics are present.")
295309
gomega.Eventually(func() error {
296310
// Execute the metrics scrape command inside the curl pod
311+
log.Println("Execute the metrics scrap command")
312+
resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", metricScrapeCmd)
313+
log.Println("Response of exec:")
314+
log.Println(resp)
315+
log.Println("Error in exec:")
316+
log.Println(err)
297317
resp, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", metricScrapeCmd)
298318
if err != nil {
299319
return err
@@ -327,13 +347,19 @@ func findReadyPod() *corev1.Pod {
327347
var readyPod *corev1.Pod
328348
gomega.Eventually(func(g gomega.Gomega) {
329349
podList := &corev1.PodList{}
350+
log.Printf("Namesapce %s", nsName)
351+
log.Printf("inferExtName %s", inferExtName)
352+
err := cli.List(ctx, podList, client.InNamespace(nsName), client.MatchingLabels{"inferencepool": inferExtName})
330353
err := testConfig.K8sClient.List(testConfig.Context, podList, client.InNamespace(testConfig.NsName), client.MatchingLabels{"app": inferExtName})
331354
g.Expect(err).NotTo(gomega.HaveOccurred())
332-
355+
log.Println("listed pods")
356+
log.Println(podList)
333357
foundReadyPod := false
334358
for i := range podList.Items {
335359
pod := &podList.Items[i]
336360
for _, cond := range pod.Status.Conditions {
361+
log.Println("Condition:")
362+
log.Println(cond)
337363
if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue {
338364
g.Expect(pod.Status.PodIP).NotTo(gomega.BeEmpty(), "Ready pod must have an IP")
339365
readyPod = pod

test/utils/utils.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ func DeploymentReadyReplicas(testConfig *TestConfig, deploy *appsv1.Deployment,
275275
var fetchedDeploy appsv1.Deployment
276276
err := testConfig.K8sClient.Get(testConfig.Context, types.NamespacedName{Namespace: deploy.Namespace, Name: deploy.Name}, &fetchedDeploy)
277277
g.Expect(err).NotTo(gomega.HaveOccurred())
278+
fmt.Sprintf("%v", fetchedDeploy)
278279
g.Expect(fetchedDeploy.Status.ReadyReplicas).To(gomega.BeNumerically(">=", count),
279280
fmt.Sprintf("Deployment only has %d ready replicas, want at least %d", fetchedDeploy.Status.ReadyReplicas, count))
280281
}, testConfig.ModelReadyTimeout, testConfig.Interval).Should(gomega.Succeed())
@@ -289,6 +290,7 @@ func checkDeploymentStatus(ctx context.Context, cli client.Client, deploy *appsv
289290
found := 0
290291
for _, want := range conditions {
291292
for _, c := range fetchedDeploy.Status.Conditions {
293+
fmt.Println(c)
292294
if c.Type == want.Type && c.Status == want.Status {
293295
found += 1
294296
}

0 commit comments

Comments
 (0)