Skip to content

Commit 5503c85

Browse files
committed
update with precommit run
1 parent 56e662c commit 5503c85

File tree

5 files changed

+40
-44
lines changed

5 files changed

+40
-44
lines changed

src/c++/perf_analyzer/client_backend/openai/openai_client.cc

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,14 @@ ChatCompletionClient::ResponseHeaderHandler(
115115
hdr.find("text/event-stream") != std::string::npos) {
116116
request->is_stream_ = true;
117117
}
118-
118+
119119
return byte_size;
120120
}
121121

122122
size_t
123123
ChatCompletionClient::ResponseHandler(
124124
void* contents, size_t size, size_t nmemb, void* userp)
125125
{
126-
127126
// [TODO TMA-1666] verify if the SSE responses received are complete, or the
128127
// response need to be stitched first. To verify, print out the received
129128
// responses from SendResponse() to make sure the OpenAI server doesn't chunk
@@ -161,7 +160,7 @@ ChatCompletionClient::ResponseHandler(
161160
// RECV_END so that we always have the time of the last.
162161
request->timer_.CaptureTimestamp(
163162
triton::client::RequestTimers::Kind::RECV_END);
164-
163+
165164
return result_bytes;
166165
}
167166

@@ -172,8 +171,6 @@ ChatCompletionClient::AsyncInfer(
172171
std::string& serialized_request_body, const std::string& request_id,
173172
const Headers& headers)
174173
{
175-
176-
177174
if (callback == nullptr) {
178175
return Error(
179176
"Callback function must be provided along with AsyncInfer() call.");
@@ -189,7 +186,7 @@ ChatCompletionClient::AsyncInfer(
189186
// will only send the first final response
190187
//
191188
// if (!request->is_stream_) {
192-
//
189+
//
193190
request->SendResponse(true /* is_final */, false /* is_null */);
194191
// }
195192
};
@@ -202,7 +199,7 @@ ChatCompletionClient::AsyncInfer(
202199
request->AddInput(
203200
reinterpret_cast<uint8_t*>(serialized_request_body.data()),
204201
serialized_request_body.size());
205-
202+
206203
CURL* multi_easy_handle = curl_easy_init();
207204
Error err = PreRunProcessing(multi_easy_handle, raw_request, headers);
208205
if (!err.IsOk()) {
@@ -243,7 +240,7 @@ ChatCompletionClient::PreRunProcessing(
243240

244241
// response data handled by ResponseHandler()
245242
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ResponseHandler);
246-
curl_easy_setopt(curl, CURLOPT_WRITEDATA, request);
243+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, request);
247244

248245
const curl_off_t post_byte_size = request->total_input_byte_size_;
249246
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, post_byte_size);

src/c++/perf_analyzer/client_backend/openai/openai_client.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ class ChatCompletionClient : public HttpClient {
173173
void* contents, size_t size, size_t nmemb, void* userp);
174174
static size_t ResponseHeaderHandler(
175175
void* contents, size_t size, size_t nmemb, void* userp);
176-
176+
177177
Error UpdateInferStat(const triton::client::RequestTimers& timer);
178178
InferStat infer_stat_;
179179
};

src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -468,24 +468,22 @@ def _convert_generic_json_to_openai_chat_completions_format(
468468

469469
@classmethod
470470
def _convert_generic_json_to_generate_format(
471-
cls,
472-
dataset_json: Dict,
473-
add_model_name: bool,
474-
add_stream: bool,
475-
extra_inputs: Dict,
476-
output_tokens_mean: int,
477-
output_tokens_stddev: int,
478-
output_tokens_deterministic: bool,
479-
model_name: str = "",
471+
cls,
472+
dataset_json: Dict,
473+
add_model_name: bool,
474+
add_stream: bool,
475+
extra_inputs: Dict,
476+
output_tokens_mean: int,
477+
output_tokens_stddev: int,
478+
output_tokens_deterministic: bool,
479+
model_name: str = "",
480480
) -> Dict:
481-
482481
(
483482
system_role_headers,
484483
user_role_headers,
485484
text_input_headers,
486485
) = cls._determine_json_feature_roles(dataset_json)
487486

488-
489487
pa_json = cls._populate_triton_generate_output_json(
490488
dataset_json,
491489
system_role_headers,
@@ -502,7 +500,6 @@ def _convert_generic_json_to_generate_format(
502500

503501
return pa_json
504502

505-
506503
@classmethod
507504
def _convert_generic_json_to_openai_completions_format(
508505
cls,
@@ -701,26 +698,25 @@ def _populate_openai_chat_completions_output_json(
701698
)
702699

703700
return pa_json
704-
701+
705702
@classmethod
706703
def _populate_triton_generate_output_json(
707-
cls,
708-
dataset: Dict,
709-
system_role_headers: List[str],
710-
user_role_headers: List[str],
711-
text_input_headers: List[str],
712-
add_model_name: bool,
713-
add_stream: bool,
714-
extra_inputs: Dict,
715-
output_tokens_mean: int,
716-
output_tokens_stddev: int,
717-
output_tokens_deterministic: bool,
718-
model_name: str = "",
704+
cls,
705+
dataset: Dict,
706+
system_role_headers: List[str],
707+
user_role_headers: List[str],
708+
text_input_headers: List[str],
709+
add_model_name: bool,
710+
add_stream: bool,
711+
extra_inputs: Dict,
712+
output_tokens_mean: int,
713+
output_tokens_stddev: int,
714+
output_tokens_deterministic: bool,
715+
model_name: str = "",
719716
) -> Dict:
717+
pa_json: dict = {"data": [{"payload": [{}]} for _ in dataset["rows"]]}
720718

721-
pa_json = {"data":[{"payload":[{}]} for _ in dataset["rows"]]}
722-
723-
for index, entry in enumerate(dataset["rows"]):
719+
for index, entry in enumerate(dataset["rows"]):
724720
for header, content in entry.items():
725721
new_text_input = cls._create_new_text_input(
726722
header,
@@ -745,8 +741,6 @@ def _populate_triton_generate_output_json(
745741

746742
return pa_json
747743

748-
749-
750744
@classmethod
751745
def _populate_openai_completions_output_json(
752746
cls,

src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,6 @@ def _run_tokenizer(self, output_texts: List[str]) -> List[List[int]]:
711711
return [out[1:] for out in encodings.data["input_ids"]]
712712

713713
def _extract_generate_text_output(self, response: str) -> str:
714-
715714
response = remove_sse_prefix(response)
716715

717716
if response == "":

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@
5151

5252
logger = logging.getLogger(__name__)
5353

54-
_endpoint_type_map = {"chat": "v1/chat/completions", "completions": "v1/completions", "generate":"v2/models/{MODEL_NAME}/generate"}
54+
_endpoint_type_map = {
55+
"chat": "v1/chat/completions",
56+
"completions": "v1/completions",
57+
"generate": "v2/models/{MODEL_NAME}/generate",
58+
}
5559

5660

5761
def _check_model_args(
@@ -115,8 +119,10 @@ def _check_conditional_args(
115119
if args.endpoint is not None:
116120
args.endpoint = args.endpoint.lstrip(" /")
117121
else:
118-
args.endpoint = _endpoint_type_map[args.endpoint_type].format(MODEL_NAME=args.model)
119-
122+
args.endpoint = _endpoint_type_map[args.endpoint_type].format(
123+
MODEL_NAME=args.model
124+
)
125+
120126
# Output token distribution checks
121127
if args.output_tokens_mean == LlmInputs.DEFAULT_OUTPUT_TOKENS_MEAN:
122128
if args.output_tokens_stddev != LlmInputs.DEFAULT_OUTPUT_TOKENS_STDDEV:
@@ -399,7 +405,7 @@ def _add_endpoint_args(parser):
399405
required=False,
400406
help=f"The endpoint-type for requests. Inputs will be formatted according to endpoint-type.",
401407
)
402-
408+
403409
endpoint_group.add_argument(
404410
"--streaming",
405411
action="store_true",

0 commit comments

Comments
 (0)