Skip to content

Commit 640052b

Browse files
[Bugfix][Frontend] Cleanup "fix chat logprobs" (#5026)
1 parent 351d5e7 commit 640052b

File tree

6 files changed

+122
-123
lines changed

6 files changed

+122
-123
lines changed

tests/async_engine/test_openapi_server_ray.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,8 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):
5555
temperature=0.0)
5656

5757
assert completion.id is not None
58-
assert completion.choices is not None and len(completion.choices) == 1
59-
assert completion.choices[0].text is not None and len(
60-
completion.choices[0].text) >= 5
58+
assert len(completion.choices) == 1
59+
assert len(completion.choices[0].text) >= 5
6160
assert completion.choices[0].finish_reason == "length"
6261
assert completion.usage == openai.types.CompletionUsage(
6362
completion_tokens=5, prompt_tokens=6, total_tokens=11)
@@ -69,8 +68,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):
6968
max_tokens=5,
7069
temperature=0.0,
7170
)
72-
assert completion.choices[0].text is not None and len(
73-
completion.choices[0].text) >= 5
71+
assert len(completion.choices[0].text) >= 5
7472

7573

7674
@pytest.mark.asyncio
@@ -90,15 +88,14 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI):
9088
logprobs=True,
9189
top_logprobs=5)
9290
assert chat_completion.id is not None
93-
assert chat_completion.choices is not None and len(
94-
chat_completion.choices) == 1
95-
assert chat_completion.choices[0].message is not None
96-
assert chat_completion.choices[0].logprobs is not None
97-
assert chat_completion.choices[0].logprobs.content[
98-
0].top_logprobs is not None
99-
assert len(
100-
chat_completion.choices[0].logprobs.content[0].top_logprobs) == 5
101-
message = chat_completion.choices[0].message
91+
assert len(chat_completion.choices) == 1
92+
93+
choice = chat_completion.choices[0]
94+
assert choice.finish_reason == "length"
95+
assert chat_completion.usage == openai.types.CompletionUsage(
96+
completion_tokens=10, prompt_tokens=13, total_tokens=23)
97+
98+
message = choice.message
10299
assert message.content is not None and len(message.content) >= 10
103100
assert message.role == "assistant"
104101
messages.append({"role": "assistant", "content": message.content})

tests/entrypoints/test_openai_server.py

Lines changed: 84 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,10 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
167167

168168
assert completion.id is not None
169169
assert completion.choices is not None and len(completion.choices) == 1
170-
assert completion.choices[0].text is not None and len(
171-
completion.choices[0].text) >= 5
172-
assert completion.choices[0].finish_reason == "length"
170+
171+
choice = completion.choices[0]
172+
assert len(choice.text) >= 5
173+
assert choice.finish_reason == "length"
173174
assert completion.usage == openai.types.CompletionUsage(
174175
completion_tokens=5, prompt_tokens=6, total_tokens=11)
175176

@@ -180,8 +181,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
180181
max_tokens=5,
181182
temperature=0.0,
182183
)
183-
assert completion.choices[0].text is not None and len(
184-
completion.choices[0].text) >= 5
184+
assert len(completion.choices[0].text) >= 5
185185

186186

187187
@pytest.mark.asyncio
@@ -206,9 +206,9 @@ async def test_no_logprobs(server, client: openai.AsyncOpenAI,
206206

207207
@pytest.mark.asyncio
208208
@pytest.mark.parametrize(
209-
# first test base model, then test loras
209+
# just test 1 lora hereafter
210210
"model_name",
211-
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
211+
[MODEL_NAME, "zephyr-lora"],
212212
)
213213
async def test_zero_logprobs(server, client: openai.AsyncOpenAI,
214214
model_name: str):
@@ -291,55 +291,7 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
291291
max_tokens=5,
292292
temperature=0.0,
293293
)
294-
completion = completion.choices[0].text
295-
assert completion is not None and len(completion) >= 0
296-
297-
298-
@pytest.mark.asyncio
299-
@pytest.mark.parametrize(
300-
# just test 1 lora hereafter
301-
"model_name",
302-
[MODEL_NAME, "zephyr-lora"],
303-
)
304-
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
305-
model_name: str):
306-
messages = [{
307-
"role": "system",
308-
"content": "you are a helpful assistant"
309-
}, {
310-
"role": "user",
311-
"content": "what is 1+1?"
312-
}]
313-
314-
# test single completion
315-
chat_completion = await client.chat.completions.create(model=model_name,
316-
messages=messages,
317-
max_tokens=10,
318-
logprobs=True,
319-
top_logprobs=5)
320-
assert chat_completion.id is not None
321-
assert chat_completion.choices is not None and len(
322-
chat_completion.choices) == 1
323-
assert chat_completion.choices[0].message is not None
324-
assert chat_completion.choices[0].logprobs is not None
325-
assert chat_completion.choices[0].logprobs.content[
326-
0].top_logprobs is not None
327-
assert len(
328-
chat_completion.choices[0].logprobs.content[0].top_logprobs) == 5
329-
message = chat_completion.choices[0].message
330-
assert message.content is not None and len(message.content) >= 10
331-
assert message.role == "assistant"
332-
messages.append({"role": "assistant", "content": message.content})
333-
334-
# test multi-turn dialogue
335-
messages.append({"role": "user", "content": "express your result in json"})
336-
chat_completion = await client.chat.completions.create(
337-
model=model_name,
338-
messages=messages,
339-
max_tokens=10,
340-
)
341-
message = chat_completion.choices[0].message
342-
assert message.content is not None and len(message.content) >= 0
294+
assert len(completion.choices[0].text) >= 0
343295

344296

345297
@pytest.mark.asyncio
@@ -394,7 +346,7 @@ async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
394346
choice = chat_completion.choices[0]
395347
assert choice.logprobs is not None
396348
assert choice.logprobs.content is not None
397-
assert len(choice.logprobs.content[0].top_logprobs) <= 1
349+
assert len(choice.logprobs.content[0].top_logprobs) == 0
398350

399351

400352
@pytest.mark.asyncio
@@ -422,11 +374,14 @@ async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
422374
choice = chat_completion.choices[0]
423375
assert choice.logprobs is not None
424376
assert choice.logprobs.content is not None
425-
assert len(choice.logprobs.content[0].top_logprobs) <= 6
377+
assert len(choice.logprobs.content[0].top_logprobs) == 5
426378

427379

428380
@pytest.mark.asyncio
429-
@pytest.mark.parametrize("model_name", [MODEL_NAME])
381+
@pytest.mark.parametrize(
382+
"model_name",
383+
[MODEL_NAME, "zephyr-lora"],
384+
)
430385
async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
431386
model_name: str):
432387
messages = [{
@@ -467,7 +422,51 @@ async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
467422

468423
@pytest.mark.asyncio
469424
@pytest.mark.parametrize(
470-
# just test 1 lora hereafter
425+
"model_name",
426+
[MODEL_NAME, "zephyr-lora"],
427+
)
428+
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
429+
model_name: str):
430+
messages = [{
431+
"role": "system",
432+
"content": "you are a helpful assistant"
433+
}, {
434+
"role": "user",
435+
"content": "what is 1+1?"
436+
}]
437+
438+
# test single completion
439+
chat_completion = await client.chat.completions.create(model=model_name,
440+
messages=messages,
441+
max_tokens=10,
442+
logprobs=True,
443+
top_logprobs=5)
444+
assert chat_completion.id is not None
445+
assert len(chat_completion.choices) == 1
446+
447+
choice = chat_completion.choices[0]
448+
assert choice.finish_reason == "length"
449+
assert chat_completion.usage == openai.types.CompletionUsage(
450+
completion_tokens=10, prompt_tokens=37, total_tokens=47)
451+
452+
message = choice.message
453+
assert message.content is not None and len(message.content) >= 10
454+
assert message.role == "assistant"
455+
messages.append({"role": "assistant", "content": message.content})
456+
457+
# test multi-turn dialogue
458+
messages.append({"role": "user", "content": "express your result in json"})
459+
chat_completion = await client.chat.completions.create(
460+
model=model_name,
461+
messages=messages,
462+
max_tokens=10,
463+
)
464+
message = chat_completion.choices[0].message
465+
assert message.content is not None and len(message.content) >= 0
466+
467+
468+
@pytest.mark.asyncio
469+
@pytest.mark.parametrize(
471470
"model_name",
472471
[MODEL_NAME, "zephyr-lora"],
473472
)
@@ -753,8 +752,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
753752
logit_bias={str(token_id): 100},
754753
seed=42,
755754
)
756-
assert completion.choices[0].text is not None and len(
757-
completion.choices[0].text) >= 5
755+
assert len(completion.choices[0].text) >= 5
758756
response_tokens = tokenizer(completion.choices[0].text,
759757
add_special_tokens=False)["input_ids"]
760758
expected_tokens = tokenizer(tokenizer.decode([token_id] * 5),
@@ -801,9 +799,8 @@ async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
801799
guided_decoding_backend=guided_decoding_backend))
802800

803801
assert completion.id is not None
804-
assert completion.choices is not None and len(completion.choices) == 3
802+
assert len(completion.choices) == 3
805803
for i in range(3):
806-
assert completion.choices[i].text is not None
807804
output_json = json.loads(completion.choices[i].text)
808805
jsonschema.validate(instance=output_json, schema=TEST_SCHEMA)
809806

@@ -870,9 +867,8 @@ async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
870867
guided_decoding_backend=guided_decoding_backend))
871868

872869
assert completion.id is not None
873-
assert completion.choices is not None and len(completion.choices) == 3
870+
assert len(completion.choices) == 3
874871
for i in range(3):
875-
assert completion.choices[i].text is not None
876872
assert re.fullmatch(TEST_REGEX, completion.choices[i].text) is not None
877873

878874

@@ -929,7 +925,7 @@ async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
929925
guided_decoding_backend=guided_decoding_backend))
930926

931927
assert completion.id is not None
932-
assert completion.choices is not None and len(completion.choices) == 2
928+
assert len(completion.choices) == 2
933929
for i in range(2):
934930
assert completion.choices[i].text in TEST_CHOICE
935931

@@ -1031,12 +1027,14 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
10311027
top_logprobs=5,
10321028
extra_body=dict(guided_choice=TEST_CHOICE,
10331029
guided_decoding_backend=guided_decoding_backend))
1030+
1031+
assert chat_completion.choices[0].logprobs is not None
1032+
assert chat_completion.choices[0].logprobs.content is not None
10341033
top_logprobs = chat_completion.choices[0].logprobs.content[0].top_logprobs
10351034

10361035
# -9999.0 is the minimum logprob returned by OpenAI
1037-
assert all(
1038-
isinstance(token.logprob, float) and token.logprob >= -9999.0
1039-
for token in top_logprobs)
1036+
for item in top_logprobs:
1037+
assert item.logprob >= -9999.0, f"Failed (top_logprobs={top_logprobs})"
10401038

10411039

10421040
@pytest.mark.asyncio
@@ -1238,6 +1236,8 @@ async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
12381236
response_format={"type": "json_object"})
12391237

12401238
content = resp.choices[0].message.content
1239+
assert content is not None
1240+
12411241
loaded = json.loads(content)
12421242
assert loaded == {"result": 2}, loaded
12431243

@@ -1365,8 +1365,7 @@ async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,
13651365

13661366
prompt_text = tokenizer.decode(prompt) if isinstance(prompt,
13671367
list) else prompt
1368-
assert (completion.choices[0].text is not None
1369-
and re.search(r"^" + prompt_text, completion.choices[0].text))
1368+
assert re.search(r"^" + prompt_text, completion.choices[0].text)
13701369
logprobs = completion.choices[0].logprobs
13711370
assert logprobs is not None
13721371
assert len(logprobs.text_offset) > 5
@@ -1407,32 +1406,32 @@ async def test_long_seed(server, client: openai.AsyncOpenAI):
14071406
)
14081407
async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
14091408
model_name: str):
1410-
input = [
1409+
input_texts = [
14111410
"The chef prepared a delicious meal.",
14121411
]
14131412

14141413
# test single embedding
14151414
embeddings = await client.embeddings.create(
14161415
model=model_name,
1417-
input=input,
1416+
input=input_texts,
14181417
encoding_format="float",
14191418
)
14201419
assert embeddings.id is not None
1421-
assert embeddings.data is not None and len(embeddings.data) == 1
1420+
assert len(embeddings.data) == 1
14221421
assert len(embeddings.data[0].embedding) == 4096
14231422
assert embeddings.usage.completion_tokens == 0
14241423
assert embeddings.usage.prompt_tokens == 9
14251424
assert embeddings.usage.total_tokens == 9
14261425

14271426
# test using token IDs
1428-
input = [1, 1, 1, 1, 1]
1427+
input_tokens = [1, 1, 1, 1, 1]
14291428
embeddings = await client.embeddings.create(
14301429
model=model_name,
1431-
input=input,
1430+
input=input_tokens,
14321431
encoding_format="float",
14331432
)
14341433
assert embeddings.id is not None
1435-
assert embeddings.data is not None and len(embeddings.data) == 1
1434+
assert len(embeddings.data) == 1
14361435
assert len(embeddings.data[0].embedding) == 4096
14371436
assert embeddings.usage.completion_tokens == 0
14381437
assert embeddings.usage.prompt_tokens == 5
@@ -1447,29 +1446,29 @@ async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
14471446
async def test_batch_embedding(embedding_server, client: openai.AsyncOpenAI,
14481447
model_name: str):
14491448
# test List[str]
1450-
inputs = [
1449+
input_texts = [
14511450
"The cat sat on the mat.", "A feline was resting on a rug.",
14521451
"Stars twinkle brightly in the night sky."
14531452
]
14541453
embeddings = await client.embeddings.create(
14551454
model=model_name,
1456-
input=inputs,
1455+
input=input_texts,
14571456
encoding_format="float",
14581457
)
14591458
assert embeddings.id is not None
1460-
assert embeddings.data is not None and len(embeddings.data) == 3
1459+
assert len(embeddings.data) == 3
14611460
assert len(embeddings.data[0].embedding) == 4096
14621461

14631462
# test List[List[int]]
1464-
inputs = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
1465-
[25, 32, 64, 77]]
1463+
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
1464+
[25, 32, 64, 77]]
14661465
embeddings = await client.embeddings.create(
14671466
model=model_name,
1468-
input=inputs,
1467+
input=input_tokens,
14691468
encoding_format="float",
14701469
)
14711470
assert embeddings.id is not None
1472-
assert embeddings.data is not None and len(embeddings.data) == 4
1471+
assert len(embeddings.data) == 4
14731472
assert len(embeddings.data[0].embedding) == 4096
14741473
assert embeddings.usage.completion_tokens == 0
14751474
assert embeddings.usage.prompt_tokens == 17

tests/tensorizer_loader/test_tensorizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,8 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
209209
temperature=0.0)
210210

211211
assert completion.id is not None
212-
assert completion.choices is not None and len(completion.choices) == 1
213-
assert completion.choices[0].text is not None and len(
214-
completion.choices[0].text) >= 5
212+
assert len(completion.choices) == 1
213+
assert len(completion.choices[0].text) >= 5
215214
assert completion.choices[0].finish_reason == "length"
216215
assert completion.usage == openai.types.CompletionUsage(
217216
completion_tokens=5, prompt_tokens=6, total_tokens=11)

0 commit comments

Comments
 (0)