@@ -167,9 +167,10 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
167
167
168
168
assert completion .id is not None
169
169
assert completion .choices is not None and len (completion .choices ) == 1
170
- assert completion .choices [0 ].text is not None and len (
171
- completion .choices [0 ].text ) >= 5
172
- assert completion .choices [0 ].finish_reason == "length"
170
+
171
+ choice = completion .choices [0 ]
172
+ assert len (choice .text ) >= 5
173
+ assert choice .finish_reason == "length"
173
174
assert completion .usage == openai .types .CompletionUsage (
174
175
completion_tokens = 5 , prompt_tokens = 6 , total_tokens = 11 )
175
176
@@ -180,8 +181,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
180
181
max_tokens = 5 ,
181
182
temperature = 0.0 ,
182
183
)
183
- assert completion .choices [0 ].text is not None and len (
184
- completion .choices [0 ].text ) >= 5
184
+ assert len (completion .choices [0 ].text ) >= 5
185
185
186
186
187
187
@pytest .mark .asyncio
@@ -206,9 +206,9 @@ async def test_no_logprobs(server, client: openai.AsyncOpenAI,
206
206
207
207
@pytest .mark .asyncio
208
208
@pytest .mark .parametrize (
209
- # first test base model, then test loras
209
+ # just test 1 lora hereafter
210
210
"model_name" ,
211
- [MODEL_NAME , "zephyr-lora" , "zephyr-lora2" ],
211
+ [MODEL_NAME , "zephyr-lora" ],
212
212
)
213
213
async def test_zero_logprobs (server , client : openai .AsyncOpenAI ,
214
214
model_name : str ):
@@ -291,55 +291,7 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
291
291
max_tokens = 5 ,
292
292
temperature = 0.0 ,
293
293
)
294
- completion = completion .choices [0 ].text
295
- assert completion is not None and len (completion ) >= 0
296
-
297
-
298
- @pytest .mark .asyncio
299
- @pytest .mark .parametrize (
300
- # just test 1 lora hereafter
301
- "model_name" ,
302
- [MODEL_NAME , "zephyr-lora" ],
303
- )
304
- async def test_single_chat_session (server , client : openai .AsyncOpenAI ,
305
- model_name : str ):
306
- messages = [{
307
- "role" : "system" ,
308
- "content" : "you are a helpful assistant"
309
- }, {
310
- "role" : "user" ,
311
- "content" : "what is 1+1?"
312
- }]
313
-
314
- # test single completion
315
- chat_completion = await client .chat .completions .create (model = model_name ,
316
- messages = messages ,
317
- max_tokens = 10 ,
318
- logprobs = True ,
319
- top_logprobs = 5 )
320
- assert chat_completion .id is not None
321
- assert chat_completion .choices is not None and len (
322
- chat_completion .choices ) == 1
323
- assert chat_completion .choices [0 ].message is not None
324
- assert chat_completion .choices [0 ].logprobs is not None
325
- assert chat_completion .choices [0 ].logprobs .content [
326
- 0 ].top_logprobs is not None
327
- assert len (
328
- chat_completion .choices [0 ].logprobs .content [0 ].top_logprobs ) == 5
329
- message = chat_completion .choices [0 ].message
330
- assert message .content is not None and len (message .content ) >= 10
331
- assert message .role == "assistant"
332
- messages .append ({"role" : "assistant" , "content" : message .content })
333
-
334
- # test multi-turn dialogue
335
- messages .append ({"role" : "user" , "content" : "express your result in json" })
336
- chat_completion = await client .chat .completions .create (
337
- model = model_name ,
338
- messages = messages ,
339
- max_tokens = 10 ,
340
- )
341
- message = chat_completion .choices [0 ].message
342
- assert message .content is not None and len (message .content ) >= 0
294
+ assert len (completion .choices [0 ].text ) >= 0
343
295
344
296
345
297
@pytest .mark .asyncio
@@ -394,7 +346,7 @@ async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
394
346
choice = chat_completion .choices [0 ]
395
347
assert choice .logprobs is not None
396
348
assert choice .logprobs .content is not None
397
- assert len (choice .logprobs .content [0 ].top_logprobs ) <= 1
349
+ assert len (choice .logprobs .content [0 ].top_logprobs ) == 0
398
350
399
351
400
352
@pytest .mark .asyncio
@@ -422,11 +374,14 @@ async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
422
374
choice = chat_completion .choices [0 ]
423
375
assert choice .logprobs is not None
424
376
assert choice .logprobs .content is not None
425
- assert len (choice .logprobs .content [0 ].top_logprobs ) <= 6
377
+ assert len (choice .logprobs .content [0 ].top_logprobs ) == 5
426
378
427
379
428
380
@pytest .mark .asyncio
429
- @pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
381
+ @pytest .mark .parametrize (
382
+ "model_name" ,
383
+ [MODEL_NAME , "zephyr-lora" ],
384
+ )
430
385
async def test_too_many_chat_logprobs (server , client : openai .AsyncOpenAI ,
431
386
model_name : str ):
432
387
messages = [{
@@ -467,7 +422,51 @@ async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
467
422
468
423
@pytest .mark .asyncio
469
424
@pytest .mark .parametrize (
470
- # just test 1 lora hereafter
425
+ "model_name" ,
426
+ [MODEL_NAME , "zephyr-lora" ],
427
+ )
428
+ async def test_single_chat_session (server , client : openai .AsyncOpenAI ,
429
+ model_name : str ):
430
+ messages = [{
431
+ "role" : "system" ,
432
+ "content" : "you are a helpful assistant"
433
+ }, {
434
+ "role" : "user" ,
435
+ "content" : "what is 1+1?"
436
+ }]
437
+
438
+ # test single completion
439
+ chat_completion = await client .chat .completions .create (model = model_name ,
440
+ messages = messages ,
441
+ max_tokens = 10 ,
442
+ logprobs = True ,
443
+ top_logprobs = 5 )
444
+ assert chat_completion .id is not None
445
+ assert len (chat_completion .choices ) == 1
446
+
447
+ choice = chat_completion .choices [0 ]
448
+ assert choice .finish_reason == "length"
449
+ assert chat_completion .usage == openai .types .CompletionUsage (
450
+ completion_tokens = 10 , prompt_tokens = 37 , total_tokens = 47 )
451
+
452
+ message = choice .message
453
+ assert message .content is not None and len (message .content ) >= 10
454
+ assert message .role == "assistant"
455
+ messages .append ({"role" : "assistant" , "content" : message .content })
456
+
457
+ # test multi-turn dialogue
458
+ messages .append ({"role" : "user" , "content" : "express your result in json" })
459
+ chat_completion = await client .chat .completions .create (
460
+ model = model_name ,
461
+ messages = messages ,
462
+ max_tokens = 10 ,
463
+ )
464
+ message = chat_completion .choices [0 ].message
465
+ assert message .content is not None and len (message .content ) >= 0
466
+
467
+
468
+ @pytest .mark .asyncio
469
+ @pytest .mark .parametrize (
471
470
"model_name" ,
472
471
[MODEL_NAME , "zephyr-lora" ],
473
472
)
@@ -753,8 +752,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
753
752
logit_bias = {str (token_id ): 100 },
754
753
seed = 42 ,
755
754
)
756
- assert completion .choices [0 ].text is not None and len (
757
- completion .choices [0 ].text ) >= 5
755
+ assert len (completion .choices [0 ].text ) >= 5
758
756
response_tokens = tokenizer (completion .choices [0 ].text ,
759
757
add_special_tokens = False )["input_ids" ]
760
758
expected_tokens = tokenizer (tokenizer .decode ([token_id ] * 5 ),
@@ -801,9 +799,8 @@ async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
801
799
guided_decoding_backend = guided_decoding_backend ))
802
800
803
801
assert completion .id is not None
804
- assert completion . choices is not None and len (completion .choices ) == 3
802
+ assert len (completion .choices ) == 3
805
803
for i in range (3 ):
806
- assert completion .choices [i ].text is not None
807
804
output_json = json .loads (completion .choices [i ].text )
808
805
jsonschema .validate (instance = output_json , schema = TEST_SCHEMA )
809
806
@@ -870,9 +867,8 @@ async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
870
867
guided_decoding_backend = guided_decoding_backend ))
871
868
872
869
assert completion .id is not None
873
- assert completion . choices is not None and len (completion .choices ) == 3
870
+ assert len (completion .choices ) == 3
874
871
for i in range (3 ):
875
- assert completion .choices [i ].text is not None
876
872
assert re .fullmatch (TEST_REGEX , completion .choices [i ].text ) is not None
877
873
878
874
@@ -929,7 +925,7 @@ async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
929
925
guided_decoding_backend = guided_decoding_backend ))
930
926
931
927
assert completion .id is not None
932
- assert completion . choices is not None and len (completion .choices ) == 2
928
+ assert len (completion .choices ) == 2
933
929
for i in range (2 ):
934
930
assert completion .choices [i ].text in TEST_CHOICE
935
931
@@ -1031,12 +1027,14 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
1031
1027
top_logprobs = 5 ,
1032
1028
extra_body = dict (guided_choice = TEST_CHOICE ,
1033
1029
guided_decoding_backend = guided_decoding_backend ))
1030
+
1031
+ assert chat_completion .choices [0 ].logprobs is not None
1032
+ assert chat_completion .choices [0 ].logprobs .content is not None
1034
1033
top_logprobs = chat_completion .choices [0 ].logprobs .content [0 ].top_logprobs
1035
1034
1036
1035
# -9999.0 is the minimum logprob returned by OpenAI
1037
- assert all (
1038
- isinstance (token .logprob , float ) and token .logprob >= - 9999.0
1039
- for token in top_logprobs )
1036
+ for item in top_logprobs :
1037
+ assert item .logprob >= - 9999.0 , f"Failed (top_logprobs={ top_logprobs } )"
1040
1038
1041
1039
1042
1040
@pytest .mark .asyncio
@@ -1238,6 +1236,8 @@ async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
1238
1236
response_format = {"type" : "json_object" })
1239
1237
1240
1238
content = resp .choices [0 ].message .content
1239
+ assert content is not None
1240
+
1241
1241
loaded = json .loads (content )
1242
1242
assert loaded == {"result" : 2 }, loaded
1243
1243
@@ -1365,8 +1365,7 @@ async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,
1365
1365
1366
1366
prompt_text = tokenizer .decode (prompt ) if isinstance (prompt ,
1367
1367
list ) else prompt
1368
- assert (completion .choices [0 ].text is not None
1369
- and re .search (r"^" + prompt_text , completion .choices [0 ].text ))
1368
+ assert re .search (r"^" + prompt_text , completion .choices [0 ].text )
1370
1369
logprobs = completion .choices [0 ].logprobs
1371
1370
assert logprobs is not None
1372
1371
assert len (logprobs .text_offset ) > 5
@@ -1407,32 +1406,32 @@ async def test_long_seed(server, client: openai.AsyncOpenAI):
1407
1406
)
1408
1407
async def test_single_embedding (embedding_server , client : openai .AsyncOpenAI ,
1409
1408
model_name : str ):
1410
- input = [
1409
+ input_texts = [
1411
1410
"The chef prepared a delicious meal." ,
1412
1411
]
1413
1412
1414
1413
# test single embedding
1415
1414
embeddings = await client .embeddings .create (
1416
1415
model = model_name ,
1417
- input = input ,
1416
+ input = input_texts ,
1418
1417
encoding_format = "float" ,
1419
1418
)
1420
1419
assert embeddings .id is not None
1421
- assert embeddings . data is not None and len (embeddings .data ) == 1
1420
+ assert len (embeddings .data ) == 1
1422
1421
assert len (embeddings .data [0 ].embedding ) == 4096
1423
1422
assert embeddings .usage .completion_tokens == 0
1424
1423
assert embeddings .usage .prompt_tokens == 9
1425
1424
assert embeddings .usage .total_tokens == 9
1426
1425
1427
1426
# test using token IDs
1428
- input = [1 , 1 , 1 , 1 , 1 ]
1427
+ input_tokens = [1 , 1 , 1 , 1 , 1 ]
1429
1428
embeddings = await client .embeddings .create (
1430
1429
model = model_name ,
1431
- input = input ,
1430
+ input = input_tokens ,
1432
1431
encoding_format = "float" ,
1433
1432
)
1434
1433
assert embeddings .id is not None
1435
- assert embeddings . data is not None and len (embeddings .data ) == 1
1434
+ assert len (embeddings .data ) == 1
1436
1435
assert len (embeddings .data [0 ].embedding ) == 4096
1437
1436
assert embeddings .usage .completion_tokens == 0
1438
1437
assert embeddings .usage .prompt_tokens == 5
@@ -1447,29 +1446,29 @@ async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
1447
1446
async def test_batch_embedding (embedding_server , client : openai .AsyncOpenAI ,
1448
1447
model_name : str ):
1449
1448
# test List[str]
1450
- inputs = [
1449
+ input_texts = [
1451
1450
"The cat sat on the mat." , "A feline was resting on a rug." ,
1452
1451
"Stars twinkle brightly in the night sky."
1453
1452
]
1454
1453
embeddings = await client .embeddings .create (
1455
1454
model = model_name ,
1456
- input = inputs ,
1455
+ input = input_texts ,
1457
1456
encoding_format = "float" ,
1458
1457
)
1459
1458
assert embeddings .id is not None
1460
- assert embeddings . data is not None and len (embeddings .data ) == 3
1459
+ assert len (embeddings .data ) == 3
1461
1460
assert len (embeddings .data [0 ].embedding ) == 4096
1462
1461
1463
1462
# test List[List[int]]
1464
- inputs = [[4 , 5 , 7 , 9 , 20 ], [15 , 29 , 499 ], [24 , 24 , 24 , 24 , 24 ],
1465
- [25 , 32 , 64 , 77 ]]
1463
+ input_tokens = [[4 , 5 , 7 , 9 , 20 ], [15 , 29 , 499 ], [24 , 24 , 24 , 24 , 24 ],
1464
+ [25 , 32 , 64 , 77 ]]
1466
1465
embeddings = await client .embeddings .create (
1467
1466
model = model_name ,
1468
- input = inputs ,
1467
+ input = input_tokens ,
1469
1468
encoding_format = "float" ,
1470
1469
)
1471
1470
assert embeddings .id is not None
1472
- assert embeddings . data is not None and len (embeddings .data ) == 4
1471
+ assert len (embeddings .data ) == 4
1473
1472
assert len (embeddings .data [0 ].embedding ) == 4096
1474
1473
assert embeddings .usage .completion_tokens == 0
1475
1474
assert embeddings .usage .prompt_tokens == 17
0 commit comments