Skip to content

Commit bfc1c7a

Browse files
committed
updating tests
1 parent 5503c85 commit bfc1c7a

File tree

2 files changed

+17
-44
lines changed

2 files changed

+17
-44
lines changed

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"chat": "v1/chat/completions",
5656
"completions": "v1/completions",
5757
"generate": "v2/models/{MODEL_NAME}/generate",
58+
"kserve": "v2/models/{MODEL_NAME}/infer",
5859
}
5960

6061

@@ -380,10 +381,10 @@ def _add_endpoint_args(parser):
380381
endpoint_group.add_argument(
381382
"--backend",
382383
type=str,
383-
choices=utils.get_enum_names(OutputFormat)[2:],
384+
choices=["tensorrtllm", "vllm"],
384385
default="tensorrtllm",
385386
required=False,
386-
help=f'When using the "triton" service-kind, '
387+
help=f'When using the "kserve" endpoint type, '
387388
"this is the backend of the model. "
388389
"For the TENSORRT-LLM backend, you currently must set "
389390
"'exclude_input_in_output' to true in the model config to "

src/c++/perf_analyzer/genai-perf/tests/test_cli.py

Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,17 @@ def test_help_version_arguments_output_and_exit(
8080
),
8181
(["--concurrency", "3"], {"concurrency": 3}),
8282
(
83-
["--endpoint-type", "completions", "--service-kind", "openai"],
83+
["--endpoint-type", "completions"],
8484
{"endpoint": "v1/completions"},
8585
),
8686
(
87-
["--endpoint-type", "chat", "--service-kind", "openai"],
87+
["--endpoint-type", "chat"],
8888
{"endpoint": "v1/chat/completions"},
8989
),
9090
(
9191
[
9292
"--endpoint-type",
9393
"chat",
94-
"--service-kind",
95-
"openai",
9694
"--endpoint",
9795
"custom/address",
9896
],
@@ -102,8 +100,6 @@ def test_help_version_arguments_output_and_exit(
102100
[
103101
"--endpoint-type",
104102
"chat",
105-
"--service-kind",
106-
"openai",
107103
"--endpoint",
108104
" /custom/address",
109105
],
@@ -113,8 +109,6 @@ def test_help_version_arguments_output_and_exit(
113109
[
114110
"--endpoint-type",
115111
"completions",
116-
"--service-kind",
117-
"openai",
118112
"--endpoint",
119113
"custom/address",
120114
],
@@ -164,9 +158,9 @@ def test_help_version_arguments_output_and_exit(
164158
(["--random-seed", "8"], {"random_seed": 8}),
165159
(["--request-rate", "9.0"], {"request_rate": 9.0}),
166160
(["-s", "99.5"], {"stability_percentage": 99.5}),
167-
(["--service-kind", "triton"], {"service_kind": "triton"}),
161+
(["--endpoint-type", "kserve"], {"service_kind": "triton"}),
168162
(
169-
["--service-kind", "openai", "--endpoint-type", "chat"],
163+
["--endpoint-type", "chat"],
170164
{"service_kind": "openai", "endpoint": "v1/chat/completions"},
171165
),
172166
(["--stability-percentage", "99.5"], {"stability_percentage": 99.5}),
@@ -263,25 +257,25 @@ def test_file_flags_parsed(self, monkeypatch, mocker):
263257
"arg, expected_path",
264258
[
265259
(
266-
["--service-kind", "openai", "--endpoint-type", "chat"],
260+
["--endpoint-type", "chat"],
267261
"artifacts/test_model-openai-chat-concurrency1",
268262
),
269263
(
270-
["--service-kind", "openai", "--endpoint-type", "completions"],
264+
["--endpoint-type", "completions"],
271265
"artifacts/test_model-openai-completions-concurrency1",
272266
),
273267
(
274-
["--service-kind", "triton", "--backend", "tensorrtllm"],
268+
["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
275269
"artifacts/test_model-triton-tensorrtllm-concurrency1",
276270
),
277271
(
278-
["--service-kind", "triton", "--backend", "vllm"],
272+
["--endpoint-type", "kserve", "--backend", "vllm"],
279273
"artifacts/test_model-triton-vllm-concurrency1",
280274
),
281275
(
282276
[
283-
"--service-kind",
284-
"triton",
277+
"--endpoint-type",
278+
"kserve",
285279
"--backend",
286280
"vllm",
287281
"--concurrency",
@@ -318,8 +312,6 @@ def test_default_profile_export_filepath(
318312
[
319313
"--model",
320314
"hello/world/test_model",
321-
"--service-kind",
322-
"openai",
323315
"--endpoint-type",
324316
"chat",
325317
],
@@ -407,22 +399,6 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
407399
@pytest.mark.parametrize(
408400
"args, expected_output",
409401
[
410-
(
411-
["genai-perf", "-m", "test_model", "--service-kind", "openai"],
412-
"The --endpoint-type option is required when using the 'openai' service-kind.",
413-
),
414-
(
415-
[
416-
"genai-perf",
417-
"-m",
418-
"test_model",
419-
"--service-kind",
420-
"openai",
421-
"--endpoint",
422-
"custom/address",
423-
],
424-
"The --endpoint-type option is required when using the 'openai' service-kind.",
425-
),
426402
(
427403
["genai-perf", "-m", "test_model", "--output-tokens-stddev", "5"],
428404
"The --output-tokens-mean option is required when using --output-tokens-stddev.",
@@ -450,8 +426,6 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
450426
"genai-perf",
451427
"-m",
452428
"test_model",
453-
"--service-kind",
454-
"openai",
455429
"--endpoint-type",
456430
"chat",
457431
"--output-tokens-mean",
@@ -476,17 +450,15 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
476450
"args, expected_format",
477451
[
478452
(
479-
["--service-kind", "openai", "--endpoint-type", "chat"],
453+
["--endpoint-type", "chat"],
480454
OutputFormat.OPENAI_CHAT_COMPLETIONS,
481455
),
482456
(
483-
["--service-kind", "openai", "--endpoint-type", "completions"],
457+
["--endpoint-type", "completions"],
484458
OutputFormat.OPENAI_COMPLETIONS,
485459
),
486460
(
487461
[
488-
"--service-kind",
489-
"openai",
490462
"--endpoint-type",
491463
"completions",
492464
"--endpoint",
@@ -495,10 +467,10 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
495467
OutputFormat.OPENAI_COMPLETIONS,
496468
),
497469
(
498-
["--service-kind", "triton", "--backend", "tensorrtllm"],
470+
["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
499471
OutputFormat.TENSORRTLLM,
500472
),
501-
(["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM),
473+
(["--endpoint-type", "kserve", "--backend", "vllm"], OutputFormat.VLLM),
502474
],
503475
)
504476
def test_inferred_output_format(self, monkeypatch, args, expected_format):

0 commit comments

Comments
 (0)