Skip to content

Commit 91bceb8

Browse files
committed
updating tests
1 parent 2aee1fe commit 91bceb8

File tree

2 files changed

+17
-44
lines changed

2 files changed

+17
-44
lines changed

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"chat": "v1/chat/completions",
5151
"completions": "v1/completions",
5252
"generate": "v2/models/{MODEL_NAME}/generate",
53+
"kserve": "v2/models/{MODEL_NAME}/infer",
5354
}
5455

5556

@@ -348,10 +349,10 @@ def _add_endpoint_args(parser):
348349
endpoint_group.add_argument(
349350
"--backend",
350351
type=str,
351-
choices=utils.get_enum_names(OutputFormat)[2:],
352+
choices=["tensorrtllm", "vllm"],
352353
default="tensorrtllm",
353354
required=False,
354-
help=f'When using the "triton" service-kind, '
355+
help=f'When using the "kserve" endpoint type, '
355356
"this is the backend of the model. "
356357
"For the TENSORRT-LLM backend, you currently must set "
357358
"'exclude_input_in_output' to true in the model config to "

src/c++/perf_analyzer/genai-perf/tests/test_cli.py

Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,17 @@ def test_help_version_arguments_output_and_exit(
7272
[
7373
(["--concurrency", "3"], {"concurrency": 3}),
7474
(
75-
["--endpoint-type", "completions", "--service-kind", "openai"],
75+
["--endpoint-type", "completions"],
7676
{"endpoint": "v1/completions"},
7777
),
7878
(
79-
["--endpoint-type", "chat", "--service-kind", "openai"],
79+
["--endpoint-type", "chat"],
8080
{"endpoint": "v1/chat/completions"},
8181
),
8282
(
8383
[
8484
"--endpoint-type",
8585
"chat",
86-
"--service-kind",
87-
"openai",
8886
"--endpoint",
8987
"custom/address",
9088
],
@@ -94,8 +92,6 @@ def test_help_version_arguments_output_and_exit(
9492
[
9593
"--endpoint-type",
9694
"chat",
97-
"--service-kind",
98-
"openai",
9995
"--endpoint",
10096
" /custom/address",
10197
],
@@ -105,8 +101,6 @@ def test_help_version_arguments_output_and_exit(
105101
[
106102
"--endpoint-type",
107103
"completions",
108-
"--service-kind",
109-
"openai",
110104
"--endpoint",
111105
"custom/address",
112106
],
@@ -159,9 +153,9 @@ def test_help_version_arguments_output_and_exit(
159153
),
160154
(["--random-seed", "8"], {"random_seed": 8}),
161155
(["--request-rate", "9.0"], {"request_rate": 9.0}),
162-
(["--service-kind", "triton"], {"service_kind": "triton"}),
156+
(["--endpoint-type", "kserve"], {"service_kind": "triton"}),
163157
(
164-
["--service-kind", "openai", "--endpoint-type", "chat"],
158+
["--endpoint-type", "chat"],
165159
{"service_kind": "openai", "endpoint": "v1/chat/completions"},
166160
),
167161
(["--stability-percentage", "99.5"], {"stability_percentage": 99.5}),
@@ -209,25 +203,25 @@ def test_file_flags_parsed(self, monkeypatch, mocker):
209203
"arg, expected_path",
210204
[
211205
(
212-
["--service-kind", "openai", "--endpoint-type", "chat"],
206+
["--endpoint-type", "chat"],
213207
"artifacts/test_model-openai-chat-concurrency1",
214208
),
215209
(
216-
["--service-kind", "openai", "--endpoint-type", "completions"],
210+
["--endpoint-type", "completions"],
217211
"artifacts/test_model-openai-completions-concurrency1",
218212
),
219213
(
220-
["--service-kind", "triton", "--backend", "tensorrtllm"],
214+
["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
221215
"artifacts/test_model-triton-tensorrtllm-concurrency1",
222216
),
223217
(
224-
["--service-kind", "triton", "--backend", "vllm"],
218+
["--endpoint-type", "kserve", "--backend", "vllm"],
225219
"artifacts/test_model-triton-vllm-concurrency1",
226220
),
227221
(
228222
[
229-
"--service-kind",
230-
"triton",
223+
"--endpoint-type",
224+
"kserve",
231225
"--backend",
232226
"vllm",
233227
"--concurrency",
@@ -263,8 +257,6 @@ def test_default_profile_export_filepath(
263257
[
264258
"--model",
265259
"hello/world/test_model",
266-
"--service-kind",
267-
"openai",
268260
"--endpoint-type",
269261
"chat",
270262
],
@@ -351,22 +343,6 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
351343
@pytest.mark.parametrize(
352344
"args, expected_output",
353345
[
354-
(
355-
["genai-perf", "-m", "test_model", "--service-kind", "openai"],
356-
"The --endpoint-type option is required when using the 'openai' service-kind.",
357-
),
358-
(
359-
[
360-
"genai-perf",
361-
"-m",
362-
"test_model",
363-
"--service-kind",
364-
"openai",
365-
"--endpoint",
366-
"custom/address",
367-
],
368-
"The --endpoint-type option is required when using the 'openai' service-kind.",
369-
),
370346
(
371347
["genai-perf", "-m", "test_model", "--output-tokens-stddev", "5"],
372348
"The --output-tokens-mean option is required when using --output-tokens-stddev.",
@@ -394,8 +370,6 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
394370
"genai-perf",
395371
"-m",
396372
"test_model",
397-
"--service-kind",
398-
"openai",
399373
"--endpoint-type",
400374
"chat",
401375
"--output-tokens-mean",
@@ -420,17 +394,15 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
420394
"args, expected_format",
421395
[
422396
(
423-
["--service-kind", "openai", "--endpoint-type", "chat"],
397+
["--endpoint-type", "chat"],
424398
OutputFormat.OPENAI_CHAT_COMPLETIONS,
425399
),
426400
(
427-
["--service-kind", "openai", "--endpoint-type", "completions"],
401+
["--endpoint-type", "completions"],
428402
OutputFormat.OPENAI_COMPLETIONS,
429403
),
430404
(
431405
[
432-
"--service-kind",
433-
"openai",
434406
"--endpoint-type",
435407
"completions",
436408
"--endpoint",
@@ -439,10 +411,10 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
439411
OutputFormat.OPENAI_COMPLETIONS,
440412
),
441413
(
442-
["--service-kind", "triton", "--backend", "tensorrtllm"],
414+
["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
443415
OutputFormat.TENSORRTLLM,
444416
),
445-
(["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM),
417+
(["--endpoint-type", "kserve", "--backend", "vllm"], OutputFormat.VLLM),
446418
],
447419
)
448420
def test_inferred_output_format(self, monkeypatch, args, expected_format):

0 commit comments

Comments
 (0)