Skip to content

Commit 2aee1fe

Browse files
committed
update with precommit run
1 parent 13a710a commit 2aee1fe

File tree

5 files changed

+40
-44
lines changed

5 files changed

+40
-44
lines changed

src/c++/perf_analyzer/client_backend/openai/openai_client.cc

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,14 @@ ChatCompletionClient::ResponseHeaderHandler(
115115
hdr.find("text/event-stream") != std::string::npos) {
116116
request->is_stream_ = true;
117117
}
118-
118+
119119
return byte_size;
120120
}
121121

122122
size_t
123123
ChatCompletionClient::ResponseHandler(
124124
void* contents, size_t size, size_t nmemb, void* userp)
125125
{
126-
127126
// [TODO TMA-1666] verify if the SSE responses received are complete, or the
128127
// response need to be stitched first. To verify, print out the received
129128
// responses from SendResponse() to make sure the OpenAI server doesn't chunk
@@ -161,7 +160,7 @@ ChatCompletionClient::ResponseHandler(
161160
// RECV_END so that we always have the time of the last.
162161
request->timer_.CaptureTimestamp(
163162
triton::client::RequestTimers::Kind::RECV_END);
164-
163+
165164
return result_bytes;
166165
}
167166

@@ -172,8 +171,6 @@ ChatCompletionClient::AsyncInfer(
172171
std::string& serialized_request_body, const std::string& request_id,
173172
const Headers& headers)
174173
{
175-
176-
177174
if (callback == nullptr) {
178175
return Error(
179176
"Callback function must be provided along with AsyncInfer() call.");
@@ -189,7 +186,7 @@ ChatCompletionClient::AsyncInfer(
189186
// will only send the first final response
190187
//
191188
// if (!request->is_stream_) {
192-
//
189+
//
193190
request->SendResponse(true /* is_final */, false /* is_null */);
194191
// }
195192
};
@@ -202,7 +199,7 @@ ChatCompletionClient::AsyncInfer(
202199
request->AddInput(
203200
reinterpret_cast<uint8_t*>(serialized_request_body.data()),
204201
serialized_request_body.size());
205-
202+
206203
CURL* multi_easy_handle = curl_easy_init();
207204
Error err = PreRunProcessing(multi_easy_handle, raw_request, headers);
208205
if (!err.IsOk()) {
@@ -243,7 +240,7 @@ ChatCompletionClient::PreRunProcessing(
243240

244241
// response data handled by ResponseHandler()
245242
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ResponseHandler);
246-
curl_easy_setopt(curl, CURLOPT_WRITEDATA, request);
243+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, request);
247244

248245
const curl_off_t post_byte_size = request->total_input_byte_size_;
249246
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, post_byte_size);

src/c++/perf_analyzer/client_backend/openai/openai_client.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ class ChatCompletionClient : public HttpClient {
173173
void* contents, size_t size, size_t nmemb, void* userp);
174174
static size_t ResponseHeaderHandler(
175175
void* contents, size_t size, size_t nmemb, void* userp);
176-
176+
177177
Error UpdateInferStat(const triton::client::RequestTimers& timer);
178178
InferStat infer_stat_;
179179
};

src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -454,24 +454,22 @@ def _convert_generic_json_to_openai_chat_completions_format(
454454

455455
@classmethod
456456
def _convert_generic_json_to_generate_format(
457-
cls,
458-
dataset_json: Dict,
459-
add_model_name: bool,
460-
add_stream: bool,
461-
extra_inputs: Dict,
462-
output_tokens_mean: int,
463-
output_tokens_stddev: int,
464-
output_tokens_deterministic: bool,
465-
model_name: str = "",
457+
cls,
458+
dataset_json: Dict,
459+
add_model_name: bool,
460+
add_stream: bool,
461+
extra_inputs: Dict,
462+
output_tokens_mean: int,
463+
output_tokens_stddev: int,
464+
output_tokens_deterministic: bool,
465+
model_name: str = "",
466466
) -> Dict:
467-
468467
(
469468
system_role_headers,
470469
user_role_headers,
471470
text_input_headers,
472471
) = cls._determine_json_feature_roles(dataset_json)
473472

474-
475473
pa_json = cls._populate_triton_generate_output_json(
476474
dataset_json,
477475
system_role_headers,
@@ -488,7 +486,6 @@ def _convert_generic_json_to_generate_format(
488486

489487
return pa_json
490488

491-
492489
@classmethod
493490
def _convert_generic_json_to_openai_completions_format(
494491
cls,
@@ -666,26 +663,25 @@ def _populate_openai_chat_completions_output_json(
666663
)
667664

668665
return pa_json
669-
666+
670667
@classmethod
671668
def _populate_triton_generate_output_json(
672-
cls,
673-
dataset: Dict,
674-
system_role_headers: List[str],
675-
user_role_headers: List[str],
676-
text_input_headers: List[str],
677-
add_model_name: bool,
678-
add_stream: bool,
679-
extra_inputs: Dict,
680-
output_tokens_mean: int,
681-
output_tokens_stddev: int,
682-
output_tokens_deterministic: bool,
683-
model_name: str = "",
669+
cls,
670+
dataset: Dict,
671+
system_role_headers: List[str],
672+
user_role_headers: List[str],
673+
text_input_headers: List[str],
674+
add_model_name: bool,
675+
add_stream: bool,
676+
extra_inputs: Dict,
677+
output_tokens_mean: int,
678+
output_tokens_stddev: int,
679+
output_tokens_deterministic: bool,
680+
model_name: str = "",
684681
) -> Dict:
682+
pa_json: dict = {"data": [{"payload": [{}]} for _ in dataset["rows"]]}
685683

686-
pa_json = {"data":[{"payload":[{}]} for _ in dataset["rows"]]}
687-
688-
for index, entry in enumerate(dataset["rows"]):
684+
for index, entry in enumerate(dataset["rows"]):
689685
for header, content in entry.items():
690686
new_text_input = cls._create_new_text_input(
691687
header,
@@ -710,8 +706,6 @@ def _populate_triton_generate_output_json(
710706

711707
return pa_json
712708

713-
714-
715709
@classmethod
716710
def _populate_openai_completions_output_json(
717711
cls,

src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,6 @@ def _run_tokenizer(self, output_texts: List[str]) -> List[List[int]]:
682682
return [out[1:] for out in encodings.data["input_ids"]]
683683

684684
def _extract_generate_text_output(self, response: str) -> str:
685-
686685
response = remove_sse_prefix(response)
687686

688687
if response == "":

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@
4646

4747
logger = logging.getLogger(__name__)
4848

49-
_endpoint_type_map = {"chat": "v1/chat/completions", "completions": "v1/completions", "generate":"v2/models/{MODEL_NAME}/generate"}
49+
_endpoint_type_map = {
50+
"chat": "v1/chat/completions",
51+
"completions": "v1/completions",
52+
"generate": "v2/models/{MODEL_NAME}/generate",
53+
}
5054

5155

5256
def _check_model_args(
@@ -96,8 +100,10 @@ def _check_conditional_args(
96100
if args.endpoint is not None:
97101
args.endpoint = args.endpoint.lstrip(" /")
98102
else:
99-
args.endpoint = _endpoint_type_map[args.endpoint_type].format(MODEL_NAME=args.model)
100-
103+
args.endpoint = _endpoint_type_map[args.endpoint_type].format(
104+
MODEL_NAME=args.model
105+
)
106+
101107
# Output token distribution checks
102108
if args.output_tokens_mean == LlmInputs.DEFAULT_OUTPUT_TOKENS_MEAN:
103109
if args.output_tokens_stddev != LlmInputs.DEFAULT_OUTPUT_TOKENS_STDDEV:
@@ -367,7 +373,7 @@ def _add_endpoint_args(parser):
367373
required=False,
368374
help=f"The endpoint-type for requests. Inputs will be formatted according to endpoint-type.",
369375
)
370-
376+
371377
endpoint_group.add_argument(
372378
"--streaming",
373379
action="store_true",

0 commit comments

Comments
 (0)