Skip to content

Commit ede62e6

Browse files
authored
add e2e test case for chat completions (#868)
Signed-off-by: Hang Yin <[email protected]>
1 parent 8c460a6 commit ede62e6

File tree

1 file changed

+72
-43
lines changed

1 file changed

+72
-43
lines changed

test/e2e/epp/e2e_test.go

Lines changed: 72 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,19 @@ import (
3333
)
3434

3535
var _ = ginkgo.Describe("InferencePool", func() {
36+
var infModel *v1alpha2.InferenceModel
3637
ginkgo.BeforeEach(func() {
3738
ginkgo.By("Waiting for the namespace to exist.")
3839
namespaceExists(cli, nsName)
40+
41+
ginkgo.By("Creating an InferenceModel resource")
42+
infModel = newInferenceModel(nsName)
43+
gomega.Expect(cli.Create(ctx, infModel)).To(gomega.Succeed())
44+
45+
ginkgo.By("Ensuring the InferenceModel resource exists in the namespace")
46+
gomega.Eventually(func() error {
47+
return cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
48+
}, existsTimeout, interval).Should(gomega.Succeed())
3949
})
4050

4151
ginkgo.AfterEach(func() {
@@ -45,49 +55,61 @@ var _ = ginkgo.Describe("InferencePool", func() {
4555

4656
ginkgo.When("The Inference Extension is running", func() {
4757
ginkgo.It("Should route traffic to target model servers", func() {
48-
ginkgo.By("Creating an InferenceModel resource")
49-
infModel := newInferenceModel(nsName)
50-
gomega.Expect(cli.Create(ctx, infModel)).To(gomega.Succeed())
51-
52-
ginkgo.By("Ensuring the InferenceModel resource exists in the namespace")
53-
gomega.Eventually(func() error {
54-
return cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
55-
}, existsTimeout, interval).Should(gomega.Succeed())
56-
57-
ginkgo.By("Verifying connectivity through the inference extension")
58-
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout)
59-
60-
// Ensure the expected responses include the inferencemodel target model names.
61-
var expected []string
62-
for _, m := range infModel.Spec.TargetModels {
63-
expected = append(expected, m.Name)
64-
}
65-
actual := make(map[string]int)
66-
gomega.Eventually(func() error {
67-
resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
68-
if err != nil {
69-
return err
70-
}
71-
if !strings.Contains(resp, "200 OK") {
72-
return fmt.Errorf("did not get 200 OK: %s", resp)
73-
}
74-
for _, m := range expected {
75-
if strings.Contains(resp, m) {
76-
actual[m] = 0
77-
}
78-
}
79-
var got []string
80-
for m := range actual {
81-
got = append(got, m)
82-
}
83-
// Compare ignoring order
84-
if !cmp.Equal(got, expected, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
85-
return fmt.Errorf("actual (%v) != expected (%v); resp=%q", got, expected, resp)
58+
for _, t := range []struct {
59+
api string
60+
promptOrMessages string
61+
}{
62+
{
63+
api: "/completions",
64+
promptOrMessages: "Write as if you were a critic: San Francisco",
65+
},
66+
{
67+
api: "/chat/completions",
68+
promptOrMessages: `[{"role": "user", "content": "Write as if you were a critic: San Francisco"}]`,
69+
},
70+
{
71+
api: "/chat/completions",
72+
promptOrMessages: `[{"role": "user", "content": "Write as if you were a critic: San Francisco"},` +
73+
`{"role": "assistant", "content": "Okay, let's see..."},` +
74+
`{"role": "user", "content": "Now summarize your thoughts."}]`,
75+
},
76+
} {
77+
ginkgo.By("Verifying connectivity through the inference extension with " +
78+
t.api + " api and prompt/messages: " + t.promptOrMessages)
79+
80+
// Ensure the expected responses include the inferencemodel target model names.
81+
var expected []string
82+
for _, m := range infModel.Spec.TargetModels {
83+
expected = append(expected, m.Name)
8684
}
85+
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages)
8786

88-
return nil
89-
}, readyTimeout, curlInterval).Should(gomega.Succeed())
87+
actual := make(map[string]int)
88+
gomega.Eventually(func() error {
89+
resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
90+
if err != nil {
91+
return err
92+
}
93+
if !strings.Contains(resp, "200 OK") {
94+
return fmt.Errorf("did not get 200 OK: %s", resp)
95+
}
96+
for _, m := range expected {
97+
if strings.Contains(resp, m) {
98+
actual[m] = 0
99+
}
100+
}
101+
var got []string
102+
for m := range actual {
103+
got = append(got, m)
104+
}
105+
// Compare ignoring order
106+
if !cmp.Equal(got, expected, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
107+
return fmt.Errorf("actual (%v) != expected (%v); resp=%q", got, expected, resp)
108+
}
90109

110+
return nil
111+
}, readyTimeout, curlInterval).Should(gomega.Succeed())
112+
}
91113
})
92114
})
93115
})
@@ -110,16 +132,23 @@ func newInferenceModel(ns string) *v1alpha2.InferenceModel {
110132

111133
// getCurlCommand returns the command, as a slice of strings, for curl'ing
112134
// the test model server at the given name, namespace, port, and model name.
113-
func getCurlCommand(name, ns, port, model string, timeout time.Duration) []string {
135+
func getCurlCommand(name, ns, port, model string, timeout time.Duration, api string, promptOrMessages string) []string {
136+
var body string
137+
switch api {
138+
case "/completions":
139+
body = fmt.Sprintf(`{"model": "%s", "prompt": "%s", "max_tokens": 100, "temperature": 0}`, model, promptOrMessages)
140+
case "/chat/completions":
141+
body = fmt.Sprintf(`{"model": "%s", "messages": %s, "max_tokens": 100, "temperature": 0}`, model, promptOrMessages)
142+
}
114143
return []string{
115144
"curl",
116145
"-i",
117146
"--max-time",
118147
strconv.Itoa((int)(timeout.Seconds())),
119-
fmt.Sprintf("%s.%s.svc:%s/v1/completions", name, ns, port),
148+
fmt.Sprintf("%s.%s.svc:%s/v1%s", name, ns, port, api),
120149
"-H",
121150
"Content-Type: application/json",
122151
"-d",
123-
fmt.Sprintf(`{"model": "%s", "prompt": "Write as if you were a critic: San Francisco", "max_tokens": 100, "temperature": 0}`, model),
152+
body,
124153
}
125154
}

0 commit comments

Comments
 (0)