@@ -189,6 +189,119 @@ def test_llm_inputs_with_defaults(self, default_configured_url):
189189 dataset_json = LlmInputs ._convert_input_url_dataset_to_generic_json (
190190 dataset = dataset
191191 )
192+ pa_json = LlmInputs ._convert_generic_json_to_output_format (
193+ output_format = OutputFormat .OPENAI_CHAT_COMPLETIONS ,
194+ generic_dataset = dataset_json ,
195+ add_model_name = False ,
196+ add_stream = False ,
197+ extra_inputs = {},
198+ output_tokens_mean = LlmInputs .DEFAULT_OUTPUT_TOKENS_MEAN ,
199+ output_tokens_stddev = LlmInputs .DEFAULT_OUTPUT_TOKENS_STDDEV ,
200+ output_tokens_deterministic = False ,
201+ )
202+
203+ assert pa_json is not None
204+ assert len (pa_json ["data" ]) == LlmInputs .DEFAULT_LENGTH
205+
206+ @pytest .mark .xfail (
207+ reason = "Download from huggingface may fail due to server issues" ,
208+ raises = GenAIPerfException ,
209+ )
210+ def test_create_openai_llm_inputs_cnn_dailymail (self ):
211+ """
212+ Test CNN_DAILYMAIL can be accessed
213+ """
214+ pa_json = LlmInputs .create_llm_inputs (
215+ input_type = PromptSource .DATASET ,
216+ dataset_name = CNN_DAILY_MAIL ,
217+ output_format = OutputFormat .OPENAI_CHAT_COMPLETIONS ,
218+ )
219+
220+ os .remove (DEFAULT_INPUT_DATA_JSON )
221+
222+ assert pa_json is not None
223+ assert len (pa_json ["data" ]) == LlmInputs .DEFAULT_LENGTH
224+
225+ def test_write_to_file (self ):
226+ """
227+ Test that write to file is working correctly
228+ """
229+ pa_json = LlmInputs .create_llm_inputs (
230+ input_type = PromptSource .DATASET ,
231+ dataset_name = OPEN_ORCA ,
232+ output_format = OutputFormat .OPENAI_CHAT_COMPLETIONS ,
233+ model_name = "open_orca" ,
234+ add_model_name = True ,
235+ add_stream = True ,
236+ )
237+ try :
238+ with open (DEFAULT_INPUT_DATA_JSON , "r" ) as f :
239+ json_str = f .read ()
240+ finally :
241+ os .remove (DEFAULT_INPUT_DATA_JSON )
242+
243+ assert pa_json == json .loads (json_str )
244+
245+ def test_create_openai_to_vllm (self ):
246+ """
247+ Test conversion of openai to vllm
248+ """
249+ pa_json = LlmInputs .create_llm_inputs (
250+ input_type = PromptSource .DATASET ,
251+ output_format = OutputFormat .VLLM ,
252+ dataset_name = OPEN_ORCA ,
253+ add_model_name = False ,
254+ add_stream = True ,
255+ )
256+
257+ os .remove (DEFAULT_INPUT_DATA_JSON )
258+
259+ assert pa_json is not None
260+ assert len (pa_json ["data" ]) == LlmInputs .DEFAULT_LENGTH
261+
262+ def test_create_openai_to_completions (self ):
263+ """
264+ Test conversion of openai to completions
265+ """
266+ pa_json = LlmInputs .create_llm_inputs (
267+ input_type = PromptSource .DATASET ,
268+ output_format = OutputFormat .OPENAI_COMPLETIONS ,
269+ dataset_name = OPEN_ORCA ,
270+ add_model_name = False ,
271+ add_stream = True ,
272+ )
273+
274+ os .remove (DEFAULT_INPUT_DATA_JSON )
275+
276+ assert pa_json is not None
277+ assert len (pa_json ["data" ]) == LlmInputs .DEFAULT_LENGTH
278+ # NIM legacy completion endpoint only supports string and not
279+ # array of strings. Verify that the prompt is of type string
280+ # not list
281+ assert isinstance (pa_json ["data" ][0 ]["payload" ][0 ]["prompt" ], str )
282+
283+ def test_create_openai_to_trtllm (self ):
284+ """
285+ Test conversion of openai to trtllm
286+ """
287+ pa_json = LlmInputs .create_llm_inputs (
288+ input_type = PromptSource .DATASET ,
289+ output_format = OutputFormat .TENSORRTLLM ,
290+ dataset_name = OPEN_ORCA ,
291+ add_model_name = False ,
292+ add_stream = True ,
293+ )
294+
295+ os .remove (DEFAULT_INPUT_DATA_JSON )
296+
297+ assert pa_json is not None
298+ assert len (pa_json ["data" ]) == LlmInputs .DEFAULT_LENGTH
299+
300+ def test_random_synthetic_no_stddev (self , default_tokenizer ):
301+ """
302+ Test that we can produce an exact number of random synthetic tokens
303+ """
304+ random .seed (1 )
192305
193306 assert dataset_json is not None
194307 assert len (dataset_json ["rows" ]) == TEST_LENGTH
0 commit comments