Skip to content

Commit b2261a1

Browse files
committed
Fix broken AsyncInferenceClient on [DONE] signal (#2458)
* fix trailing newlines * fix cassettes * quality
1 parent f2fbf5b commit b2261a1

File tree

4 files changed

+34
-43
lines changed

4 files changed

+34
-43
lines changed

src/huggingface_hub/inference/_common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def _format_text_generation_stream_output(
296296
if not byte_payload.startswith(b"data:"):
297297
return None # empty line
298298

299-
if byte_payload == b"data: [DONE]":
299+
if byte_payload.strip() == b"data: [DONE]":
300300
raise StopIteration("[DONE] signal received.")
301301

302302
# Decode payload
@@ -344,7 +344,7 @@ def _format_chat_completion_stream_output(
344344
if not byte_payload.startswith(b"data:"):
345345
return None # empty line
346346

347-
if byte_payload == b"data: [DONE]":
347+
if byte_payload.strip() == b"data: [DONE]":
348348
raise StopIteration("[DONE] signal received.")
349349

350350
# Decode payload
@@ -355,7 +355,7 @@ def _format_chat_completion_stream_output(
355355

356356
async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
357357
async for byte_payload in response.content:
358-
yield byte_payload
358+
yield byte_payload.strip()
359359
await client.close()
360360

361361

tests/cassettes/test_async_chat_completion_with_stream.yaml

Lines changed: 22 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,80 +3,71 @@ interactions:
33
body: null
44
headers:
55
user-agent:
6-
- unknown/None; hf_hub/0.22.0.dev0; python/3.10.12; torch/2.2.0; tensorflow/2.15.0.post1;
6+
- unknown/None; hf_hub/0.25.0.dev0; python/3.10.12; torch/2.3.1; tensorflow/2.17.0;
77
fastcore/1.5.23
88
method: POST
99
uri: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
1010
response:
1111
body:
12-
string: 'data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"Deep"},"logprobs":null,"finish_reason":null}]}
12+
string: 'data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"Deep"},"logprobs":null,"finish_reason":null}]}
1313
1414
15-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
16-
Learning"},"logprobs":null,"finish_reason":null}]}
15+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
16+
learning"},"logprobs":null,"finish_reason":null}]}
1717
1818
19-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
19+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
2020
is"},"logprobs":null,"finish_reason":null}]}
2121
2222
23-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
23+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
2424
a"},"logprobs":null,"finish_reason":null}]}
2525
2626
27-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
27+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
2828
sub"},"logprobs":null,"finish_reason":null}]}
2929
3030
31-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"field"},"logprobs":null,"finish_reason":null}]}
31+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"field"},"logprobs":null,"finish_reason":null}]}
3232
3333
34-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
34+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
3535
of"},"logprobs":null,"finish_reason":null}]}
3636
3737
38-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
39-
Machine"},"logprobs":null,"finish_reason":null}]}
38+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
39+
machine"},"logprobs":null,"finish_reason":null}]}
4040
4141
42-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
43-
Learning"},"logprobs":null,"finish_reason":null}]}
42+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
43+
learning"},"logprobs":null,"finish_reason":null}]}
4444
4545
46-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
47-
that"},"logprobs":null,"finish_reason":null}]}
46+
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
47+
that"},"logprobs":null,"finish_reason":"length"}]}
48+
4849
49-
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"length"}]}
5050
'
5151
headers:
5252
Access-Control-Allow-Credentials:
5353
- 'true'
54-
Access-Control-Allow-Origin:
55-
- '*'
56-
Cache-Control:
57-
- no-cache
5854
Connection:
5955
- keep-alive
56+
Content-Length:
57+
- '2526'
6058
Content-Type:
6159
- text/event-stream
6260
Date:
63-
- Thu, 14 Mar 2024 18:05:08 GMT
64-
Transfer-Encoding:
65-
- chunked
61+
- Mon, 19 Aug 2024 13:01:29 GMT
6662
Vary:
67-
- origin, Origin, Access-Control-Request-Method, Access-Control-Request-Headers
68-
x-accel-buffering:
69-
- 'no'
70-
x-compute-characters:
71-
- '103'
63+
- Origin, Access-Control-Request-Method, Access-Control-Request-Headers
7264
x-compute-type:
73-
- 1-a10-g
65+
- cache
7466
x-request-id:
75-
- idvh81inTm9FBUT-za5t7
67+
- w9oS4KSPCoEAOi6QV7-cX
7668
x-sha:
7769
- b70e0c9a2d9e14bd1e812d3c398e5f313e93b473
7870
status:
7971
code: 200
8072
message: OK
81-
url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
8273
version: 1

tests/test_inference_async_client.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,18 +212,16 @@ async def test_async_chat_completion_with_stream() -> None:
212212

213213
all_items = [item async for item in output]
214214
generated_text = ""
215-
for item in all_items[:-1]: # all but last item
215+
for item in all_items:
216216
assert isinstance(item, ChatCompletionStreamOutput)
217217
assert len(item.choices) == 1
218218
generated_text += item.choices[0].delta.content
219219
last_item = all_items[-1]
220220

221-
assert generated_text == "Deep Learning is a subfield of Machine Learning that"
221+
assert generated_text == "Deep learning is a subfield of machine learning that"
222222

223-
# Last item has a finish reason but no role/content delta
223+
# Last item has a finish reason
224224
assert last_item.choices[0].finish_reason == "length"
225-
assert last_item.choices[0].delta.role is None
226-
assert last_item.choices[0].delta.content is None
227225

228226

229227
@pytest.mark.vcr

tests/test_inference_client.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -974,13 +974,14 @@ def test_chat_completion_base_url_works_with_v1(base_url: str):
974974
assert post_mock.call_args_list[0].kwargs["model"] == "http://0.0.0.0:8080/v1/chat/completions"
975975

976976

977-
def test_stream_text_generation_response():
977+
@pytest.mark.parametrize("stop_signal", [b"data: [DONE]", b"data: [DONE]\n", b"data: [DONE] "])
978+
def test_stream_text_generation_response(stop_signal: bytes):
978979
data = [
979980
b'data: {"index":1,"token":{"id":4560,"text":" trying","logprob":-2.078125,"special":false},"generated_text":null,"details":null}',
980981
b"", # Empty line is skipped
981982
b"\n", # Newline is skipped
982983
b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
983-
b"data: [DONE]", # Stop signal
984+
stop_signal, # Stop signal
984985
# Won't parse after
985986
b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
986987
]
@@ -989,13 +990,14 @@ def test_stream_text_generation_response():
989990
assert output == [" trying", " to"]
990991

991992

992-
def test_stream_chat_completion_response():
993+
@pytest.mark.parametrize("stop_signal", [b"data: [DONE]", b"data: [DONE]\n", b"data: [DONE] "])
994+
def test_stream_chat_completion_response(stop_signal: bytes):
993995
data = [
994996
b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
995997
b"", # Empty line is skipped
996998
b"\n", # Newline is skipped
997999
b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":" Rust"},"logprobs":null,"finish_reason":null}]}',
998-
b"data: [DONE]", # Stop signal
1000+
stop_signal, # Stop signal
9991001
# Won't parse after
10001002
b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
10011003
]

0 commit comments

Comments
 (0)