Skip to content

Commit df147db

Browse files
authored
add e2e test for epp metrics (#938)
* add e2e test for epp metrics Signed-off-by: Hang Yin <[email protected]> * fix linting --------- Signed-off-by: Hang Yin <[email protected]>
1 parent 708882c commit df147db

File tree

4 files changed

+243
-16
lines changed

4 files changed

+243
-16
lines changed

test/e2e/epp/e2e_suite_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package epp
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223
"os"
2324
"strings"
@@ -70,6 +71,8 @@ const (
7071
envoyPort = "8081"
7172
// inferExtName is the name of the inference extension test resources.
7273
inferExtName = "vllm-llama3-8b-instruct-epp"
74+
// metricsReaderSecretName is the name of the metrics reader secret which stores sa token to read epp metrics.
75+
metricsReaderSecretName = "inference-gateway-sa-metrics-reader-secret"
7376
// clientManifest is the manifest for the client test resources.
7477
clientManifest = "../../testdata/client.yaml"
7578
// modelServerSecretManifest is the manifest for the model server secret resource.
@@ -82,6 +85,8 @@ const (
8285
inferExtManifest = "../../testdata/inferencepool-e2e.yaml"
8386
// envoyManifest is the manifest for the envoy proxy test resources.
8487
envoyManifest = "../../testdata/envoy.yaml"
88+
// metricsRbacManifest is the manifest for the rbac resources for testing metrics.
89+
metricsRbacManifest = "../../testdata/metrics-rbac.yaml"
8590
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
8691
modelServerManifestFilepathEnvVar = "MANIFEST_PATH"
8792
)
@@ -133,6 +138,7 @@ func setupInfra() {
133138
createInferExt(cli, inferExtManifest)
134139
createClient(cli, clientManifest)
135140
createEnvoy(cli, envoyManifest)
141+
createMetricsRbac(cli, metricsRbacManifest)
136142
// Run this step last, as it requires additional time for the model server to become ready.
137143
createModelServer(cli, modelServerManifestArray, modelServerManifestPath)
138144
}
@@ -259,6 +265,30 @@ func createClient(k8sClient client.Client, filePath string) {
259265
testutils.PodReady(ctx, k8sClient, pod, readyTimeout, interval)
260266
}
261267

268+
// createMetricsRbac creates the metrics RBAC resources from the manifest file.
269+
func createMetricsRbac(k8sClient client.Client, filePath string) {
270+
inManifests := readYaml(filePath)
271+
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
272+
outManifests := []string{}
273+
for _, m := range inManifests {
274+
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
275+
}
276+
ginkgo.By("Creating RBAC resources for scraping metrics from manifest: " + filePath)
277+
createObjsFromYaml(k8sClient, outManifests)
278+
279+
// wait for sa token to exist
280+
testutils.EventuallyExists(ctx, func() error {
281+
token, err := getMetricsReaderToken(k8sClient)
282+
if err != nil {
283+
return err
284+
}
285+
if len(token) == 0 {
286+
return errors.New("failed to get metrics reader token")
287+
}
288+
return nil
289+
}, existsTimeout, interval)
290+
}
291+
262292
// createModelServer creates the model server resources used for testing from the given filePaths.
263293
func createModelServer(k8sClient client.Client, modelServerManifestArray []string, deployPath string) {
264294
ginkgo.By("Creating model server resources from manifest: " + deployPath)

test/e2e/epp/e2e_test.go

Lines changed: 154 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717
package epp
1818

1919
import (
20+
"encoding/json"
21+
"errors"
2022
"fmt"
2123
"strconv"
2224
"strings"
@@ -26,9 +28,12 @@ import (
2628
"github.com/google/go-cmp/cmp/cmpopts"
2729
"github.com/onsi/ginkgo/v2"
2830
"github.com/onsi/gomega"
31+
corev1 "k8s.io/api/core/v1"
32+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2933
"k8s.io/apimachinery/pkg/types"
3034
"k8s.io/utils/ptr"
31-
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
35+
client "sigs.k8s.io/controller-runtime/pkg/client"
36+
v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
3237
testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
3338
)
3439

@@ -51,38 +56,57 @@ var _ = ginkgo.Describe("InferencePool", func() {
5156
ginkgo.AfterEach(func() {
5257
ginkgo.By("Deleting the InferenceModel test resource.")
5358
cleanupInferModelResources()
59+
gomega.Eventually(func() error {
60+
err := cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
61+
if err == nil {
62+
return errors.New("InferenceModel resource still exists")
63+
}
64+
if !k8serrors.IsNotFound(err) {
65+
return nil
66+
}
67+
return nil
68+
}, existsTimeout, interval).Should(gomega.Succeed())
5469
})
5570

5671
ginkgo.When("The Inference Extension is running", func() {
5772
ginkgo.It("Should route traffic to target model servers", func() {
5873
for _, t := range []struct {
5974
api string
60-
promptOrMessages string
75+
promptOrMessages any
6176
}{
6277
{
6378
api: "/completions",
6479
promptOrMessages: "Write as if you were a critic: San Francisco",
6580
},
6681
{
67-
api: "/chat/completions",
68-
promptOrMessages: `[{"role": "user", "content": "Write as if you were a critic: San Francisco"}]`,
82+
api: "/chat/completions",
83+
promptOrMessages: []map[string]any{
84+
{
85+
"role": "user",
86+
"content": "Write as if you were a critic: San Francisco",
87+
},
88+
},
6989
},
7090
{
7191
api: "/chat/completions",
72-
promptOrMessages: `[{"role": "user", "content": "Write as if you were a critic: San Francisco"},` +
73-
`{"role": "assistant", "content": "Okay, let's see..."},` +
74-
`{"role": "user", "content": "Now summarize your thoughts."}]`,
92+
promptOrMessages: []map[string]any{
93+
{
94+
"role": "user",
95+
"content": "Write as if you were a critic: San Francisco",
96+
},
97+
{"role": "assistant", "content": "Okay, let's see..."},
98+
{"role": "user", "content": "Now summarize your thoughts."},
99+
},
75100
},
76101
} {
77-
ginkgo.By("Verifying connectivity through the inference extension with " +
78-
t.api + " api and prompt/messages: " + t.promptOrMessages)
102+
ginkgo.By(fmt.Sprintf("Verifying connectivity through the inference extension with %s api and prompt/messages: %v", t.api, t.promptOrMessages))
79103

80104
// Ensure the expected responses include the inferencemodel target model names.
81105
var expected []string
82106
for _, m := range infModel.Spec.TargetModels {
83107
expected = append(expected, m.Name)
84108
}
85-
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages)
109+
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages, false)
86110

87111
actual := make(map[string]int)
88112
gomega.Eventually(func() error {
@@ -106,11 +130,103 @@ var _ = ginkgo.Describe("InferencePool", func() {
106130
if !cmp.Equal(got, expected, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
107131
return fmt.Errorf("actual (%v) != expected (%v); resp=%q", got, expected, resp)
108132
}
109-
110133
return nil
111134
}, readyTimeout, curlInterval).Should(gomega.Succeed())
112135
}
113136
})
137+
138+
ginkgo.It("Should expose EPP metrics after generating traffic", func() {
139+
// Define the metrics we expect to see
140+
expectedMetrics := []string{
141+
"inference_model_request_total",
142+
"inference_model_request_error_total",
143+
"inference_model_request_duration_seconds",
144+
// TODO: normalized_time_per_output_token_seconds is not actually recorded yet
145+
// "normalized_time_per_output_token_seconds",
146+
"inference_model_request_sizes",
147+
"inference_model_response_sizes",
148+
"inference_model_input_tokens",
149+
"inference_model_output_tokens",
150+
"inference_pool_average_kv_cache_utilization",
151+
"inference_pool_average_queue_size",
152+
"inference_pool_per_pod_queue_size",
153+
"inference_model_running_requests",
154+
"inference_pool_ready_pods",
155+
"inference_extension_info",
156+
}
157+
158+
// Generate traffic by sending requests through the inference extension
159+
ginkgo.By("Generating traffic through the inference extension")
160+
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
161+
162+
// Run the curl command multiple times to generate some metrics data
163+
for i := 0; i < 5; i++ {
164+
_, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
165+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
166+
}
167+
168+
// modify the curl command to generate some error metrics
169+
curlCmd[len(curlCmd)-1] = "invalid input"
170+
for i := 0; i < 5; i++ {
171+
_, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
172+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
173+
}
174+
175+
// Now scrape metrics from the EPP endpoint via the curl pod
176+
ginkgo.By("Scraping metrics from the EPP endpoint")
177+
178+
// Get Pod IP instead of Service
179+
podList := &corev1.PodList{}
180+
err := cli.List(ctx, podList, client.InNamespace(nsName), client.MatchingLabels{"app": inferExtName})
181+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
182+
gomega.Expect(podList.Items).NotTo(gomega.BeEmpty())
183+
podIP := podList.Items[0].Status.PodIP
184+
gomega.Expect(podIP).NotTo(gomega.BeEmpty())
185+
186+
// Get the authorization token for reading metrics
187+
token := ""
188+
gomega.Eventually(func() error {
189+
token, err = getMetricsReaderToken(cli)
190+
if err != nil {
191+
return err
192+
}
193+
if token == "" {
194+
return errors.New("token not found")
195+
}
196+
return nil
197+
}, existsTimeout, interval).Should(gomega.Succeed())
198+
199+
// Construct the metric scraping curl command using Pod IP
200+
metricScrapeCmd := []string{
201+
"curl",
202+
"-i",
203+
"--max-time",
204+
strconv.Itoa((int)(curlTimeout.Seconds())),
205+
"-H",
206+
"Authorization: Bearer " + token,
207+
fmt.Sprintf("http://%s:%d/metrics", podIP, 9090),
208+
}
209+
210+
ginkgo.By("Verifying that all expected metrics are present.")
211+
gomega.Eventually(func() error {
212+
// Execute the metrics scrape command inside the curl pod
213+
resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", metricScrapeCmd)
214+
if err != nil {
215+
return err
216+
}
217+
// Verify that we got a 200 OK responsecurl
218+
if !strings.Contains(resp, "200 OK") {
219+
return fmt.Errorf("did not get 200 OK: %s", resp)
220+
}
221+
// Check if all expected metrics are present in the metrics output
222+
for _, metric := range expectedMetrics {
223+
if !strings.Contains(resp, metric) {
224+
return fmt.Errorf("expected metric %s not found in metrics output", metric)
225+
}
226+
}
227+
return nil
228+
}, readyTimeout, curlInterval).Should(gomega.Succeed())
229+
})
114230
})
115231
})
116232

@@ -130,16 +246,38 @@ func newInferenceModel(ns string) *v1alpha2.InferenceModel {
130246
Obj()
131247
}
132248

249+
func getMetricsReaderToken(k8sClient client.Client) (string, error) {
250+
secret := &corev1.Secret{}
251+
err := k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: metricsReaderSecretName}, secret)
252+
if err != nil {
253+
return "", err
254+
}
255+
return string(secret.Data["token"]), nil
256+
}
257+
133258
// getCurlCommand returns the command, as a slice of strings, for curl'ing
134259
// the test model server at the given name, namespace, port, and model name.
135-
func getCurlCommand(name, ns, port, model string, timeout time.Duration, api string, promptOrMessages string) []string {
136-
var body string
260+
func getCurlCommand(name, ns, port, model string, timeout time.Duration, api string, promptOrMessages any, streaming bool) []string {
261+
body := map[string]any{
262+
"model": model,
263+
"max_tokens": 100,
264+
"temperature": 0,
265+
}
266+
body["model"] = model
137267
switch api {
138268
case "/completions":
139-
body = fmt.Sprintf(`{"model": "%s", "prompt": "%s", "max_tokens": 100, "temperature": 0}`, model, promptOrMessages)
269+
body["prompt"] = promptOrMessages
140270
case "/chat/completions":
141-
body = fmt.Sprintf(`{"model": "%s", "messages": %s, "max_tokens": 100, "temperature": 0}`, model, promptOrMessages)
271+
body["messages"] = promptOrMessages
272+
}
273+
if streaming {
274+
body["stream"] = true
275+
body["stream_options"] = map[string]any{
276+
"include_usage": true,
277+
}
142278
}
279+
b, err := json.Marshal(body)
280+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
143281
return []string{
144282
"curl",
145283
"-i",
@@ -149,6 +287,6 @@ func getCurlCommand(name, ns, port, model string, timeout time.Duration, api str
149287
"-H",
150288
"Content-Type: application/json",
151289
"-d",
152-
body,
290+
string(b),
153291
}
154292
}

test/testdata/metrics-rbac.yaml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: inference-gateway-metrics-reader
5+
rules:
6+
- nonResourceURLs:
7+
- /metrics
8+
verbs:
9+
- get
10+
---
11+
apiVersion: v1
12+
kind: ServiceAccount
13+
metadata:
14+
name: inference-gateway-sa-metrics-reader
15+
namespace: $E2E_NS
16+
---
17+
apiVersion: rbac.authorization.k8s.io/v1
18+
kind: ClusterRoleBinding
19+
metadata:
20+
name: inference-gateway-sa-metrics-reader-role-binding
21+
subjects:
22+
- kind: ServiceAccount
23+
name: inference-gateway-sa-metrics-reader
24+
namespace: $E2E_NS
25+
roleRef:
26+
kind: ClusterRole
27+
name: inference-gateway-metrics-reader
28+
apiGroup: rbac.authorization.k8s.io
29+
---
30+
apiVersion: v1
31+
kind: Secret
32+
metadata:
33+
name: inference-gateway-sa-metrics-reader-secret
34+
namespace: $E2E_NS
35+
annotations:
36+
kubernetes.io/service-account.name: inference-gateway-sa-metrics-reader
37+
type: kubernetes.io/service-account-token

test/utils/utils.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,24 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error {
5959
if err != nil && !apierrors.IsNotFound(err) {
6060
return err
6161
}
62+
metricsReaderBinding := &rbacv1.ClusterRoleBinding{
63+
ObjectMeta: metav1.ObjectMeta{
64+
Name: "inference-gateway-sa-metrics-reader-role-binding",
65+
},
66+
}
67+
err = cli.Delete(ctx, metricsReaderBinding, client.PropagationPolicy(metav1.DeletePropagationForeground))
68+
if err != nil && !apierrors.IsNotFound(err) {
69+
return err
70+
}
71+
metricsReaderRole := &rbacv1.ClusterRole{
72+
ObjectMeta: metav1.ObjectMeta{
73+
Name: "inference-gateway-metrics-reader",
74+
},
75+
}
76+
err = cli.Delete(ctx, metricsReaderRole, client.PropagationPolicy(metav1.DeletePropagationForeground))
77+
if err != nil && !apierrors.IsNotFound(err) {
78+
return err
79+
}
6280
model := &apiextv1.CustomResourceDefinition{
6381
ObjectMeta: metav1.ObjectMeta{
6482
Name: "inferencemodels.inference.networking.x-k8s.io",
@@ -106,6 +124,10 @@ func DeleteNamespacedResources(ctx context.Context, cli client.Client, ns string
106124
if err != nil && !apierrors.IsNotFound(err) {
107125
return err
108126
}
127+
err = cli.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
128+
if err != nil && !apierrors.IsNotFound(err) {
129+
return err
130+
}
109131
err = cli.DeleteAllOf(ctx, &v1alpha2.InferencePool{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
110132
if err != nil && !apierrors.IsNotFound(err) {
111133
return err

0 commit comments

Comments
 (0)