Skip to content

Commit d0e9a70

Browse files
[CI] add CI logprobs case (#3189)
* [ci] add CI case * [ci] add CI case * [ci] add CI case * [ci] add CI case --------- Co-authored-by: ZhangYulongg <[email protected]>
1 parent 7126784 commit d0e9a70

File tree

2 files changed

+223
-0
lines changed

2 files changed

+223
-0
lines changed

test/ce/server/test_logprobs.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import json
2+
3+
from core import TEMPLATE, URL, build_request_payload, send_request
4+
5+
6+
def test_unstream_with_logprobs():
7+
"""
8+
测试非流式响应开启 logprobs 后,返回的 token 概率信息是否正确。
9+
"""
10+
data = {
11+
"stream": False,
12+
"messages": [
13+
{"role": "system", "content": "You are a helpful assistant."},
14+
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
15+
],
16+
"max_tokens": 3,
17+
}
18+
19+
# 构建请求并发送
20+
payload = build_request_payload(TEMPLATE, data)
21+
response = send_request(URL, payload)
22+
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
23+
resp_json = response.json()
24+
25+
# 校验返回内容与概率信息
26+
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
27+
assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
28+
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
29+
assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
30+
"token": "牛顿",
31+
"logprob": -0.031025361269712448,
32+
"bytes": [231, 137, 155, 233, 161, 191],
33+
"top_logprobs": None,
34+
}
35+
assert resp_json["usage"] == {
36+
"prompt_tokens": 22,
37+
"total_tokens": 25,
38+
"completion_tokens": 3,
39+
"prompt_tokens_details": {"cached_tokens": 0},
40+
}
41+
42+
43+
def test_unstream_without_logprobs():
44+
"""
45+
测试非流式响应关闭 logprobs 后,返回结果中不包含 logprobs 字段。
46+
"""
47+
data = {
48+
"stream": False,
49+
"logprobs": False,
50+
"top_logprobs": None,
51+
"messages": [
52+
{"role": "system", "content": "You are a helpful assistant."},
53+
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
54+
],
55+
"max_tokens": 3,
56+
}
57+
58+
# 构建请求并发送
59+
payload = build_request_payload(TEMPLATE, data)
60+
response = send_request(URL, payload)
61+
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
62+
resp_json = response.json()
63+
64+
# 校验返回内容与 logprobs 字段
65+
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
66+
assert resp_json["choices"][0]["logprobs"] is None
67+
assert resp_json["usage"] == {
68+
"prompt_tokens": 22,
69+
"total_tokens": 25,
70+
"completion_tokens": 3,
71+
"prompt_tokens_details": {"cached_tokens": 0},
72+
}
73+
74+
75+
def test_stream_with_logprobs():
76+
"""
77+
测试流式响应开启 logprobs 后,首个 token 的概率信息是否正确。
78+
"""
79+
data = {
80+
"stream": True,
81+
"messages": [
82+
{"role": "system", "content": "You are a helpful assistant."},
83+
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
84+
],
85+
"max_tokens": 3,
86+
}
87+
88+
payload = build_request_payload(TEMPLATE, data)
89+
response = send_request(URL, payload)
90+
91+
# 解析首个包含 content 的流式 chunk
92+
result_chunk = {}
93+
for line in response.iter_lines():
94+
if not line:
95+
continue
96+
decoded = line.decode("utf-8").removeprefix("data: ")
97+
if decoded == "[DONE]":
98+
break
99+
100+
chunk = json.loads(decoded)
101+
content = chunk["choices"][0]["delta"].get("content")
102+
if content:
103+
result_chunk = chunk
104+
print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
105+
break
106+
107+
# 校验概率字段
108+
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
109+
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
110+
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
111+
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
112+
"token": "牛顿",
113+
"logprob": -0.031025361269712448,
114+
"bytes": [231, 137, 155, 233, 161, 191],
115+
}
116+
117+
118+
def test_stream_without_logprobs():
119+
"""
120+
测试流式响应关闭 logprobs 后,确认响应中不包含 logprobs 字段。
121+
"""
122+
data = {
123+
"stream": True,
124+
"logprobs": False,
125+
"top_logprobs": None,
126+
"messages": [
127+
{"role": "system", "content": "You are a helpful assistant."},
128+
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
129+
],
130+
"max_tokens": 3,
131+
}
132+
133+
payload = build_request_payload(TEMPLATE, data)
134+
response = send_request(URL, payload)
135+
136+
# 解析首个包含 content 的流式 chunk
137+
result_chunk = {}
138+
for line in response.iter_lines():
139+
if not line:
140+
continue
141+
decoded = line.decode("utf-8").removeprefix("data: ")
142+
if decoded == "[DONE]":
143+
break
144+
145+
chunk = json.loads(decoded)
146+
content = chunk["choices"][0]["delta"].get("content")
147+
if content:
148+
result_chunk = chunk
149+
print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
150+
break
151+
152+
# 校验 logprobs 字段不存在
153+
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
154+
assert result_chunk["choices"][0]["logprobs"] is None
155+
156+
157+
if __name__ == "__main__":
158+
test_unstream_with_logprobs()
159+
test_unstream_without_logprobs()
160+
test_stream_with_logprobs()
161+
test_stream_without_logprobs()

test/ce/server/test_stream.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import json
2+
3+
from core import TEMPLATE, URL, build_request_payload, send_request
4+
5+
6+
def test_stream_and_non_stream():
7+
"""
8+
测试接口在 stream 模式和非 stream 模式下返回的内容是否一致。
9+
"""
10+
11+
# 构造 stream=True 的请求数据
12+
data = {
13+
"stream": True,
14+
"messages": [
15+
{"role": "system", "content": "You are a helpful assistant."},
16+
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
17+
],
18+
"max_tokens": 100,
19+
}
20+
21+
# 构建请求 payload 并发送流式请求
22+
payload = build_request_payload(TEMPLATE, data)
23+
response = send_request(URL, payload)
24+
25+
# 按行解析流式响应
26+
resp_chunks = []
27+
for line in response.iter_lines():
28+
if not line:
29+
continue
30+
31+
decoded = line.decode("utf-8")
32+
if decoded.startswith("data: "):
33+
decoded = decoded[len("data: ") :]
34+
35+
if decoded == "[DONE]":
36+
break
37+
38+
resp_chunks.append(json.loads(decoded))
39+
40+
# 拼接模型最终输出内容
41+
final_content = "".join(
42+
chunk["choices"][0]["delta"]["content"]
43+
for chunk in resp_chunks
44+
if "choices" in chunk and "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]
45+
)
46+
print(final_content)
47+
48+
# 修改为 stream=False,发送普通请求
49+
data["stream"] = False
50+
payload = build_request_payload(TEMPLATE, data)
51+
response = send_request(URL, payload)
52+
53+
# 打印格式化后的完整响应
54+
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
55+
response_json = response.json()
56+
57+
# 对比两种模式下输出是否一致
58+
assert final_content == response_json["choices"][0]["message"]["content"]
59+
60+
61+
if __name__ == "__main__":
62+
test_stream_and_non_stream()

0 commit comments

Comments
 (0)