@@ -72,19 +72,17 @@ def test_help_version_arguments_output_and_exit(
7272 [
7373 (["--concurrency" , "3" ], {"concurrency" : 3 }),
7474 (
75- ["--endpoint-type" , "completions" , "--service-kind" , "openai" ],
75+ ["--endpoint-type" , "completions" ],
7676 {"endpoint" : "v1/completions" },
7777 ),
7878 (
79- ["--endpoint-type" , "chat" , "--service-kind" , "openai" ],
79+ ["--endpoint-type" , "chat" ],
8080 {"endpoint" : "v1/chat/completions" },
8181 ),
8282 (
8383 [
8484 "--endpoint-type" ,
8585 "chat" ,
86- "--service-kind" ,
87- "openai" ,
8886 "--endpoint" ,
8987 "custom/address" ,
9088 ],
@@ -94,8 +92,6 @@ def test_help_version_arguments_output_and_exit(
9492 [
9593 "--endpoint-type" ,
9694 "chat" ,
97- "--service-kind" ,
98- "openai" ,
9995 "--endpoint" ,
10096 " /custom/address" ,
10197 ],
@@ -105,8 +101,6 @@ def test_help_version_arguments_output_and_exit(
105101 [
106102 "--endpoint-type" ,
107103 "completions" ,
108- "--service-kind" ,
109- "openai" ,
110104 "--endpoint" ,
111105 "custom/address" ,
112106 ],
@@ -159,9 +153,9 @@ def test_help_version_arguments_output_and_exit(
159153 ),
160154 (["--random-seed" , "8" ], {"random_seed" : 8 }),
161155 (["--request-rate" , "9.0" ], {"request_rate" : 9.0 }),
162- (["--service-kind " , "triton " ], {"service_kind" : "triton" }),
156+ (["--endpoint-type " , "kserve " ], {"service_kind" : "triton" }),
163157 (
164- ["--service-kind" , "openai" , "-- endpoint-type" , "chat" ],
158+ ["--endpoint-type" , "chat" ],
165159 {"service_kind" : "openai" , "endpoint" : "v1/chat/completions" },
166160 ),
167161 (["--stability-percentage" , "99.5" ], {"stability_percentage" : 99.5 }),
@@ -209,25 +203,25 @@ def test_file_flags_parsed(self, monkeypatch, mocker):
209203 "arg, expected_path" ,
210204 [
211205 (
212- ["--service-kind" , "openai" , "-- endpoint-type" , "chat" ],
206+ ["--endpoint-type" , "chat" ],
213207 "artifacts/test_model-openai-chat-concurrency1" ,
214208 ),
215209 (
216- ["--service-kind" , "openai" , "-- endpoint-type" , "completions" ],
210+ ["--endpoint-type" , "completions" ],
217211 "artifacts/test_model-openai-completions-concurrency1" ,
218212 ),
219213 (
220- ["--service-kind " , "triton " , "--backend" , "tensorrtllm" ],
214+ ["--endpoint-type " , "kserve " , "--backend" , "tensorrtllm" ],
221215 "artifacts/test_model-triton-tensorrtllm-concurrency1" ,
222216 ),
223217 (
224- ["--service-kind " , "triton " , "--backend" , "vllm" ],
218+ ["--endpoint-type " , "kserve " , "--backend" , "vllm" ],
225219 "artifacts/test_model-triton-vllm-concurrency1" ,
226220 ),
227221 (
228222 [
229- "--service-kind " ,
230- "triton " ,
223+ "--endpoint-type " ,
224+ "kserve " ,
231225 "--backend" ,
232226 "vllm" ,
233227 "--concurrency" ,
@@ -263,8 +257,6 @@ def test_default_profile_export_filepath(
263257 [
264258 "--model" ,
265259 "hello/world/test_model" ,
266- "--service-kind" ,
267- "openai" ,
268260 "--endpoint-type" ,
269261 "chat" ,
270262 ],
@@ -351,22 +343,6 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
351343 @pytest .mark .parametrize (
352344 "args, expected_output" ,
353345 [
354- (
355- ["genai-perf" , "-m" , "test_model" , "--service-kind" , "openai" ],
356- "The --endpoint-type option is required when using the 'openai' service-kind." ,
357- ),
358- (
359- [
360- "genai-perf" ,
361- "-m" ,
362- "test_model" ,
363- "--service-kind" ,
364- "openai" ,
365- "--endpoint" ,
366- "custom/address" ,
367- ],
368- "The --endpoint-type option is required when using the 'openai' service-kind." ,
369- ),
370346 (
371347 ["genai-perf" , "-m" , "test_model" , "--output-tokens-stddev" , "5" ],
372348 "The --output-tokens-mean option is required when using --output-tokens-stddev." ,
@@ -394,8 +370,6 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
394370 "genai-perf" ,
395371 "-m" ,
396372 "test_model" ,
397- "--service-kind" ,
398- "openai" ,
399373 "--endpoint-type" ,
400374 "chat" ,
401375 "--output-tokens-mean" ,
@@ -420,17 +394,15 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
420394 "args, expected_format" ,
421395 [
422396 (
423- ["--service-kind" , "openai" , "-- endpoint-type" , "chat" ],
397+ ["--endpoint-type" , "chat" ],
424398 OutputFormat .OPENAI_CHAT_COMPLETIONS ,
425399 ),
426400 (
427- ["--service-kind" , "openai" , "-- endpoint-type" , "completions" ],
401+ ["--endpoint-type" , "completions" ],
428402 OutputFormat .OPENAI_COMPLETIONS ,
429403 ),
430404 (
431405 [
432- "--service-kind" ,
433- "openai" ,
434406 "--endpoint-type" ,
435407 "completions" ,
436408 "--endpoint" ,
@@ -439,10 +411,10 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
439411 OutputFormat .OPENAI_COMPLETIONS ,
440412 ),
441413 (
442- ["--service-kind " , "triton " , "--backend" , "tensorrtllm" ],
414+ ["--endpoint-type " , "kserve " , "--backend" , "tensorrtllm" ],
443415 OutputFormat .TENSORRTLLM ,
444416 ),
445- (["--service-kind " , "triton " , "--backend" , "vllm" ], OutputFormat .VLLM ),
417+ (["--endpoint-type " , "kserve " , "--backend" , "vllm" ], OutputFormat .VLLM ),
446418 ],
447419 )
448420 def test_inferred_output_format (self , monkeypatch , args , expected_format ):
0 commit comments