@@ -33,9 +33,19 @@ import (
33
33
)
34
34
35
35
var _ = ginkgo .Describe ("InferencePool" , func () {
36
+ var infModel * v1alpha2.InferenceModel
36
37
ginkgo .BeforeEach (func () {
37
38
ginkgo .By ("Waiting for the namespace to exist." )
38
39
namespaceExists (cli , nsName )
40
+
41
+ ginkgo .By ("Creating an InferenceModel resource" )
42
+ infModel = newInferenceModel (nsName )
43
+ gomega .Expect (cli .Create (ctx , infModel )).To (gomega .Succeed ())
44
+
45
+ ginkgo .By ("Ensuring the InferenceModel resource exists in the namespace" )
46
+ gomega .Eventually (func () error {
47
+ return cli .Get (ctx , types.NamespacedName {Namespace : infModel .Namespace , Name : infModel .Name }, infModel )
48
+ }, existsTimeout , interval ).Should (gomega .Succeed ())
39
49
})
40
50
41
51
ginkgo .AfterEach (func () {
@@ -45,49 +55,61 @@ var _ = ginkgo.Describe("InferencePool", func() {
45
55
46
56
ginkgo .When ("The Inference Extension is running" , func () {
47
57
ginkgo .It ("Should route traffic to target model servers" , func () {
48
- ginkgo .By ("Creating an InferenceModel resource" )
49
- infModel := newInferenceModel (nsName )
50
- gomega .Expect (cli .Create (ctx , infModel )).To (gomega .Succeed ())
51
-
52
- ginkgo .By ("Ensuring the InferenceModel resource exists in the namespace" )
53
- gomega .Eventually (func () error {
54
- return cli .Get (ctx , types.NamespacedName {Namespace : infModel .Namespace , Name : infModel .Name }, infModel )
55
- }, existsTimeout , interval ).Should (gomega .Succeed ())
56
-
57
- ginkgo .By ("Verifying connectivity through the inference extension" )
58
- curlCmd := getCurlCommand (envoyName , nsName , envoyPort , modelName , curlTimeout )
59
-
60
- // Ensure the expected responses include the inferencemodel target model names.
61
- var expected []string
62
- for _ , m := range infModel .Spec .TargetModels {
63
- expected = append (expected , m .Name )
64
- }
65
- actual := make (map [string ]int )
66
- gomega .Eventually (func () error {
67
- resp , err := testutils .ExecCommandInPod (ctx , cfg , scheme , kubeCli , nsName , "curl" , "curl" , curlCmd )
68
- if err != nil {
69
- return err
70
- }
71
- if ! strings .Contains (resp , "200 OK" ) {
72
- return fmt .Errorf ("did not get 200 OK: %s" , resp )
73
- }
74
- for _ , m := range expected {
75
- if strings .Contains (resp , m ) {
76
- actual [m ] = 0
77
- }
78
- }
79
- var got []string
80
- for m := range actual {
81
- got = append (got , m )
82
- }
83
- // Compare ignoring order
84
- if ! cmp .Equal (got , expected , cmpopts .SortSlices (func (a , b string ) bool { return a < b })) {
85
- return fmt .Errorf ("actual (%v) != expected (%v); resp=%q" , got , expected , resp )
58
+ for _ , t := range []struct {
59
+ api string
60
+ promptOrMessages string
61
+ }{
62
+ {
63
+ api : "/completions" ,
64
+ promptOrMessages : "Write as if you were a critic: San Francisco" ,
65
+ },
66
+ {
67
+ api : "/chat/completions" ,
68
+ promptOrMessages : `[{"role": "user", "content": "Write as if you were a critic: San Francisco"}]` ,
69
+ },
70
+ {
71
+ api : "/chat/completions" ,
72
+ promptOrMessages : `[{"role": "user", "content": "Write as if you were a critic: San Francisco"},` +
73
+ `{"role": "assistant", "content": "Okay, let's see..."},` +
74
+ `{"role": "user", "content": "Now summarize your thoughts."}]` ,
75
+ },
76
+ } {
77
+ ginkgo .By ("Verifying connectivity through the inference extension with " +
78
+ t .api + " api and prompt/messages: " + t .promptOrMessages )
79
+
80
+ // Ensure the expected responses include the inferencemodel target model names.
81
+ var expected []string
82
+ for _ , m := range infModel .Spec .TargetModels {
83
+ expected = append (expected , m .Name )
86
84
}
85
+ curlCmd := getCurlCommand (envoyName , nsName , envoyPort , modelName , curlTimeout , t .api , t .promptOrMessages )
87
86
88
- return nil
89
- }, readyTimeout , curlInterval ).Should (gomega .Succeed ())
87
+ actual := make (map [string ]int )
88
+ gomega .Eventually (func () error {
89
+ resp , err := testutils .ExecCommandInPod (ctx , cfg , scheme , kubeCli , nsName , "curl" , "curl" , curlCmd )
90
+ if err != nil {
91
+ return err
92
+ }
93
+ if ! strings .Contains (resp , "200 OK" ) {
94
+ return fmt .Errorf ("did not get 200 OK: %s" , resp )
95
+ }
96
+ for _ , m := range expected {
97
+ if strings .Contains (resp , m ) {
98
+ actual [m ] = 0
99
+ }
100
+ }
101
+ var got []string
102
+ for m := range actual {
103
+ got = append (got , m )
104
+ }
105
+ // Compare ignoring order
106
+ if ! cmp .Equal (got , expected , cmpopts .SortSlices (func (a , b string ) bool { return a < b })) {
107
+ return fmt .Errorf ("actual (%v) != expected (%v); resp=%q" , got , expected , resp )
108
+ }
90
109
110
+ return nil
111
+ }, readyTimeout , curlInterval ).Should (gomega .Succeed ())
112
+ }
91
113
})
92
114
})
93
115
})
@@ -110,16 +132,23 @@ func newInferenceModel(ns string) *v1alpha2.InferenceModel {
110
132
111
133
// getCurlCommand returns the command, as a slice of strings, for curl'ing
112
134
// the test model server at the given name, namespace, port, and model name.
113
- func getCurlCommand (name , ns , port , model string , timeout time.Duration ) []string {
135
+ func getCurlCommand (name , ns , port , model string , timeout time.Duration , api string , promptOrMessages string ) []string {
136
+ var body string
137
+ switch api {
138
+ case "/completions" :
139
+ body = fmt .Sprintf (`{"model": "%s", "prompt": "%s", "max_tokens": 100, "temperature": 0}` , model , promptOrMessages )
140
+ case "/chat/completions" :
141
+ body = fmt .Sprintf (`{"model": "%s", "messages": %s, "max_tokens": 100, "temperature": 0}` , model , promptOrMessages )
142
+ }
114
143
return []string {
115
144
"curl" ,
116
145
"-i" ,
117
146
"--max-time" ,
118
147
strconv .Itoa ((int )(timeout .Seconds ())),
119
- fmt .Sprintf ("%s.%s.svc:%s/v1/completions " , name , ns , port ),
148
+ fmt .Sprintf ("%s.%s.svc:%s/v1%s " , name , ns , port , api ),
120
149
"-H" ,
121
150
"Content-Type: application/json" ,
122
151
"-d" ,
123
- fmt . Sprintf ( `{"model": "%s", "prompt": "Write as if you were a critic: San Francisco", "max_tokens": 100, "temperature": 0}` , model ) ,
152
+ body ,
124
153
}
125
154
}
0 commit comments