@@ -184,6 +184,26 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
184
184
completion .choices [0 ].text ) >= 5
185
185
186
186
187
+ @pytest .mark .asyncio
188
+ @pytest .mark .parametrize (
189
+ # first test base model, then test loras
190
+ "model_name" ,
191
+ [MODEL_NAME , "zephyr-lora" , "zephyr-lora2" ],
192
+ )
193
+ async def test_no_logprobs (server , client : openai .AsyncOpenAI ,
194
+ model_name : str ):
195
+ # test using token IDs
196
+ completion = await client .completions .create (
197
+ model = MODEL_NAME ,
198
+ prompt = [0 , 0 , 0 , 0 , 0 ],
199
+ max_tokens = 5 ,
200
+ temperature = 0.0 ,
201
+ logprobs = None ,
202
+ )
203
+ choice = completion .choices [0 ]
204
+ assert choice .logprobs is None
205
+
206
+
187
207
@pytest .mark .asyncio
188
208
@pytest .mark .parametrize (
189
209
# first test base model, then test loras
@@ -203,7 +223,72 @@ async def test_zero_logprobs(server, client: openai.AsyncOpenAI,
203
223
choice = completion .choices [0 ]
204
224
assert choice .logprobs is not None
205
225
assert choice .logprobs .token_logprobs is not None
206
- assert choice .logprobs .top_logprobs is None
226
+ assert choice .logprobs .top_logprobs is not None
227
+ assert len (choice .logprobs .top_logprobs [0 ]) <= 1
228
+
229
+
230
+ @pytest .mark .asyncio
231
+ @pytest .mark .parametrize (
232
+ "model_name" ,
233
+ [MODEL_NAME , "zephyr-lora" ],
234
+ )
235
+ async def test_some_logprobs (server , client : openai .AsyncOpenAI ,
236
+ model_name : str ):
237
+ # test using token IDs
238
+ completion = await client .completions .create (
239
+ model = MODEL_NAME ,
240
+ prompt = [0 , 0 , 0 , 0 , 0 ],
241
+ max_tokens = 5 ,
242
+ temperature = 0.0 ,
243
+ logprobs = 5 ,
244
+ )
245
+ choice = completion .choices [0 ]
246
+ assert choice .logprobs is not None
247
+ assert choice .logprobs .token_logprobs is not None
248
+ assert choice .logprobs .top_logprobs is not None
249
+ assert len (choice .logprobs .top_logprobs [0 ]) <= 6
250
+
251
+
252
+ @pytest .mark .asyncio
253
+ @pytest .mark .parametrize (
254
+ "model_name" ,
255
+ [MODEL_NAME , "zephyr-lora" ],
256
+ )
257
+ async def test_too_many_completion_logprobs (server , client : openai .AsyncOpenAI ,
258
+ model_name : str ):
259
+
260
+ with pytest .raises (
261
+ (openai .BadRequestError , openai .APIError )): # test using token IDs
262
+ await client .completions .create (
263
+ model = MODEL_NAME ,
264
+ prompt = [0 , 0 , 0 , 0 , 0 ],
265
+ max_tokens = 5 ,
266
+ temperature = 0.0 ,
267
+ logprobs = 6 ,
268
+ )
269
+ ...
270
+ with pytest .raises (
271
+ (openai .BadRequestError , openai .APIError )): # test using token IDs
272
+ stream = await client .completions .create (
273
+ model = MODEL_NAME ,
274
+ prompt = [0 , 0 , 0 , 0 , 0 ],
275
+ max_tokens = 5 ,
276
+ temperature = 0.0 ,
277
+ logprobs = 6 ,
278
+ stream = True ,
279
+ )
280
+ async for chunk in stream :
281
+ ...
282
+
283
+ # the server should still work afterwards
284
+ completion = await client .completions .create (
285
+ model = model_name ,
286
+ prompt = [0 , 0 , 0 , 0 , 0 ],
287
+ max_tokens = 5 ,
288
+ temperature = 0.0 ,
289
+ )
290
+ completion = completion .choices [0 ].text
291
+ assert completion is not None and len (completion ) >= 0
207
292
208
293
209
294
@pytest .mark .asyncio
@@ -233,8 +318,10 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
233
318
chat_completion .choices ) == 1
234
319
assert chat_completion .choices [0 ].message is not None
235
320
assert chat_completion .choices [0 ].logprobs is not None
236
- assert chat_completion .choices [0 ].logprobs .top_logprobs is not None
237
- assert len (chat_completion .choices [0 ].logprobs .top_logprobs [0 ]) == 5
321
+ assert chat_completion .choices [0 ].logprobs .content [
322
+ 0 ].top_logprobs is not None
323
+ assert len (
324
+ chat_completion .choices [0 ].logprobs .content [0 ].top_logprobs ) == 5
238
325
message = chat_completion .choices [0 ].message
239
326
assert message .content is not None and len (message .content ) >= 10
240
327
assert message .role == "assistant"
@@ -251,10 +338,93 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
251
338
assert message .content is not None and len (message .content ) >= 0
252
339
253
340
341
+ @pytest .mark .asyncio
342
+ @pytest .mark .parametrize (
343
+ # first test base model, then test loras
344
+ "model_name" ,
345
+ [MODEL_NAME , "zephyr-lora" , "zephyr-lora2" ],
346
+ )
347
+ async def test_no_logprobs_chat (server , client : openai .AsyncOpenAI ,
348
+ model_name : str ):
349
+ messages = [{
350
+ "role" : "system" ,
351
+ "content" : "you are a helpful assistant"
352
+ }, {
353
+ "role" : "user" ,
354
+ "content" : "what is 1+1?"
355
+ }]
356
+
357
+ chat_completion = await client .chat .completions .create (model = model_name ,
358
+ messages = messages ,
359
+ max_tokens = 5 ,
360
+ temperature = 0.0 ,
361
+ logprobs = False )
362
+
363
+ choice = chat_completion .choices [0 ]
364
+ assert choice .logprobs is None
365
+
366
+
367
+ @pytest .mark .asyncio
368
+ @pytest .mark .parametrize (
369
+ # just test 1 lora hereafter
370
+ "model_name" ,
371
+ [MODEL_NAME , "zephyr-lora" ],
372
+ )
373
+ async def test_zero_logprobs_chat (server , client : openai .AsyncOpenAI ,
374
+ model_name : str ):
375
+ messages = [{
376
+ "role" : "system" ,
377
+ "content" : "you are a helpful assistant"
378
+ }, {
379
+ "role" : "user" ,
380
+ "content" : "what is 1+1?"
381
+ }]
382
+
383
+ chat_completion = await client .chat .completions .create (model = model_name ,
384
+ messages = messages ,
385
+ max_tokens = 5 ,
386
+ temperature = 0.0 ,
387
+ logprobs = True ,
388
+ top_logprobs = 0 )
389
+
390
+ choice = chat_completion .choices [0 ]
391
+ assert choice .logprobs is not None
392
+ assert choice .logprobs .content is not None
393
+ assert len (choice .logprobs .content [0 ].top_logprobs ) <= 1
394
+
395
+
396
+ @pytest .mark .asyncio
397
+ @pytest .mark .parametrize (
398
+ "model_name" ,
399
+ [MODEL_NAME , "zephyr-lora" ],
400
+ )
401
+ async def test_some_logprobs_chat (server , client : openai .AsyncOpenAI ,
402
+ model_name : str ):
403
+ messages = [{
404
+ "role" : "system" ,
405
+ "content" : "you are a helpful assistant"
406
+ }, {
407
+ "role" : "user" ,
408
+ "content" : "what is 1+1?"
409
+ }]
410
+
411
+ chat_completion = await client .chat .completions .create (model = model_name ,
412
+ messages = messages ,
413
+ max_tokens = 5 ,
414
+ temperature = 0.0 ,
415
+ logprobs = True ,
416
+ top_logprobs = 5 )
417
+
418
+ choice = chat_completion .choices [0 ]
419
+ assert choice .logprobs is not None
420
+ assert choice .logprobs .content is not None
421
+ assert len (choice .logprobs .content [0 ].top_logprobs ) <= 6
422
+
423
+
254
424
@pytest .mark .asyncio
255
425
@pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
256
- async def test_too_many_logprobs (server , client : openai .AsyncOpenAI ,
257
- model_name : str ):
426
+ async def test_too_many_chat_logprobs (server , client : openai .AsyncOpenAI ,
427
+ model_name : str ):
258
428
messages = [{
259
429
"role" : "system" ,
260
430
"content" : "you are a helpful assistant"
@@ -263,13 +433,13 @@ async def test_too_many_logprobs(server, client: openai.AsyncOpenAI,
263
433
"content" : "what is 1+1?"
264
434
}]
265
435
266
- # Default max_logprobs is 5 , so this should raise an error
436
+ # Default max_logprobs is 20 , so this should raise an error
267
437
with pytest .raises ((openai .BadRequestError , openai .APIError )):
268
438
stream = await client .chat .completions .create (model = model_name ,
269
439
messages = messages ,
270
440
max_tokens = 10 ,
271
441
logprobs = True ,
272
- top_logprobs = 10 ,
442
+ top_logprobs = 21 ,
273
443
stream = True )
274
444
async for chunk in stream :
275
445
...
@@ -279,25 +449,9 @@ async def test_too_many_logprobs(server, client: openai.AsyncOpenAI,
279
449
messages = messages ,
280
450
max_tokens = 10 ,
281
451
logprobs = True ,
282
- top_logprobs = 10 ,
452
+ top_logprobs = 30 ,
283
453
stream = False )
284
454
285
- with pytest .raises ((openai .BadRequestError , openai .APIError )):
286
- stream = await client .completions .create (model = model_name ,
287
- prompt = "Test" ,
288
- max_tokens = 10 ,
289
- logprobs = 10 ,
290
- stream = True )
291
- async for chunk in stream :
292
- ...
293
-
294
- with pytest .raises (openai .BadRequestError ):
295
- await client .completions .create (model = model_name ,
296
- prompt = "Test" ,
297
- max_tokens = 10 ,
298
- logprobs = 10 ,
299
- stream = False )
300
-
301
455
# the server should still work afterwards
302
456
chat_completion = await client .chat .completions .create (model = model_name ,
303
457
messages = messages ,
@@ -744,13 +898,12 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
744
898
top_logprobs = 5 ,
745
899
extra_body = dict (guided_choice = TEST_CHOICE ,
746
900
guided_decoding_backend = guided_decoding_backend ))
747
- top_logprobs = chat_completion .choices [0 ].logprobs .top_logprobs
901
+ top_logprobs = chat_completion .choices [0 ].logprobs .content [ 0 ]. top_logprobs
748
902
749
903
# -9999.0 is the minimum logprob returned by OpenAI
750
904
assert all (
751
- isinstance (logprob , float ) and logprob >= - 9999.0
752
- for token_dict in top_logprobs
753
- for token , logprob in token_dict .items ())
905
+ isinstance (token .logprob , float ) and token .logprob >= - 9999.0
906
+ for token in top_logprobs )
754
907
755
908
756
909
@pytest .mark .asyncio
0 commit comments