[CI] add CI logprobs case (#3189)

plusNew001 · ZhangYulongg · web-flow · commit d0e9a703800b · 2025-08-08T15:47:55.000+08:00
* [ci] add CI case

* [ci] add CI case

* [ci] add CI case

* [ci] add CI case

---------

Co-authored-by: ZhangYulongg &lt;1272816783@qq.com&gt;
diff --git a/test/ce/server/test_logprobs.py b/test/ce/server/test_logprobs.py
@@ -0,0 +1,161 @@
+import json
+
+from core import TEMPLATE, URL, build_request_payload, send_request
+
+
+def test_unstream_with_logprobs():
+    """
+    测试非流式响应开启 logprobs 后，返回的 token 概率信息是否正确。
+    """
+    data = {
+        "stream": False,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "牛顿的三大运动定律是什么？"},
+        ],
+        "max_tokens": 3,
+    }
+
+    # 构建请求并发送
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload)
+    print(json.dumps(response.json(), indent=2, ensure_ascii=False))
+    resp_json = response.json()
+
+    # 校验返回内容与概率信息
+    assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
+    assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
+    assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
+    assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
+        "token": "牛顿",
+        "logprob": -0.031025361269712448,
+        "bytes": [231, 137, 155, 233, 161, 191],
+        "top_logprobs": None,
+    }
+    assert resp_json["usage"] == {
+        "prompt_tokens": 22,
+        "total_tokens": 25,
+        "completion_tokens": 3,
+        "prompt_tokens_details": {"cached_tokens": 0},
+    }
+
+
+def test_unstream_without_logprobs():
+    """
+    测试非流式响应关闭 logprobs 后，返回结果中不包含 logprobs 字段。
+    """
+    data = {
+        "stream": False,
+        "logprobs": False,
+        "top_logprobs": None,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "牛顿的三大运动定律是什么？"},
+        ],
+        "max_tokens": 3,
+    }
+
+    # 构建请求并发送
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload)
+    print(json.dumps(response.json(), indent=2, ensure_ascii=False))
+    resp_json = response.json()
+
+    # 校验返回内容与 logprobs 字段
+    assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
+    assert resp_json["choices"][0]["logprobs"] is None
+    assert resp_json["usage"] == {
+        "prompt_tokens": 22,
+        "total_tokens": 25,
+        "completion_tokens": 3,
+        "prompt_tokens_details": {"cached_tokens": 0},
+    }
+
+
+def test_stream_with_logprobs():
+    """
+    测试流式响应开启 logprobs 后，首个 token 的概率信息是否正确。
+    """
+    data = {
+        "stream": True,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "牛顿的三大运动定律是什么？"},
+        ],
+        "max_tokens": 3,
+    }
+
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload)
+
+    # 解析首个包含 content 的流式 chunk
+    result_chunk = {}
+    for line in response.iter_lines():
+        if not line:
+            continue
+        decoded = line.decode("utf-8").removeprefix("data: ")
+        if decoded == "[DONE]":
+            break
+
+        chunk = json.loads(decoded)
+        content = chunk["choices"][0]["delta"].get("content")
+        if content:
+            result_chunk = chunk
+            print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
+            break
+
+    # 校验概率字段
+    assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
+    assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
+    assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
+    assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
+        "token": "牛顿",
+        "logprob": -0.031025361269712448,
+        "bytes": [231, 137, 155, 233, 161, 191],
+    }
+
+
+def test_stream_without_logprobs():
+    """
+    测试流式响应关闭 logprobs 后，确认响应中不包含 logprobs 字段。
+    """
+    data = {
+        "stream": True,
+        "logprobs": False,
+        "top_logprobs": None,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "牛顿的三大运动定律是什么？"},
+        ],
+        "max_tokens": 3,
+    }
+
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload)
+
+    # 解析首个包含 content 的流式 chunk
+    result_chunk = {}
+    for line in response.iter_lines():
+        if not line:
+            continue
+        decoded = line.decode("utf-8").removeprefix("data: ")
+        if decoded == "[DONE]":
+            break
+
+        chunk = json.loads(decoded)
+        content = chunk["choices"][0]["delta"].get("content")
+        if content:
+            result_chunk = chunk
+            print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
+            break
+
+    # 校验 logprobs 字段不存在
+    assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
+    assert result_chunk["choices"][0]["logprobs"] is None
+
+
+if __name__ == "__main__":
+    test_unstream_with_logprobs()
+    test_unstream_without_logprobs()
+    test_stream_with_logprobs()
+    test_stream_without_logprobs()
diff --git a/test/ce/server/test_stream.py b/test/ce/server/test_stream.py
@@ -0,0 +1,62 @@
+import json
+
+from core import TEMPLATE, URL, build_request_payload, send_request
+
+
+def test_stream_and_non_stream():
+    """
+    测试接口在 stream 模式和非 stream 模式下返回的内容是否一致。
+    """
+
+    # 构造 stream=True 的请求数据
+    data = {
+        "stream": True,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "牛顿的三大运动定律是什么？"},
+        ],
+        "max_tokens": 100,
+    }
+
+    # 构建请求 payload 并发送流式请求
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload)
+
+    # 按行解析流式响应
+    resp_chunks = []
+    for line in response.iter_lines():
+        if not line:
+            continue
+
+        decoded = line.decode("utf-8")
+        if decoded.startswith("data: "):
+            decoded = decoded[len("data: ") :]
+
+        if decoded == "[DONE]":
+            break
+
+        resp_chunks.append(json.loads(decoded))
+
+    # 拼接模型最终输出内容
+    final_content = "".join(
+        chunk["choices"][0]["delta"]["content"]
+        for chunk in resp_chunks
+        if "choices" in chunk and "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]
+    )
+    print(final_content)
+
+    # 修改为 stream=False，发送普通请求
+    data["stream"] = False
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload)
+
+    # 打印格式化后的完整响应
+    print(json.dumps(response.json(), indent=2, ensure_ascii=False))
+    response_json = response.json()
+
+    # 对比两种模式下输出是否一致
+    assert final_content == response_json["choices"][0]["message"]["content"]
+
+
+if __name__ == "__main__":
+    test_stream_and_non_stream()