Skip to content

Commit f8be0c5

Browse files
fix issue: non-support streaming pipeline cannot work when call it as streaming (#84)
1 parent 4208e4f commit f8be0c5

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

llmserve/backend/llm/engines/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ async def launch_engine(
376376
)
377377

378378
self.base_worker_group = worker_group
379-
self.can_stream = await asyncio.gather(*[worker_group[0].can_stream.remote()])
379+
self.can_stream = ray.get(worker_group[0].can_stream.remote())
380380
return worker_group
381381

382382
async def predict(
@@ -503,5 +503,5 @@ async def stream(
503503
f"Pipeline {self.args.model_config.initialization.pipeline} does not support streaming. Ignoring queue."
504504
)
505505
yield await self.predict(
506-
prompts, timeout_s=timeout_s, start_timestamp=start_timestamp
506+
prompts, timeout_s=timeout_s, start_timestamp=start_timestamp, lock=lock
507507
)

llmserve/backend/llm/predictor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ async def _predict_async(
174174
Returns:
175175
A list of generated texts.
176176
"""
177-
prediction = await self.engine.predict(prompts, timeout_s=timeout_s, start_timestamp=start_timestamp, lock = self._base_worker_group_lock)
177+
prediction = await self.engine.predict(prompts, timeout_s=timeout_s, start_timestamp=start_timestamp, lock=self._base_worker_group_lock)
178178
return prediction
179179

180180
async def _stream_async(
@@ -197,7 +197,7 @@ async def _stream_async(
197197
Returns:
198198
A list of generated texts.
199199
"""
200-
async for s in self.engine.stream(prompts, timeout_s=timeout_s, start_timestamp=start_timestamp, lock = self._base_worker_group_lock):
200+
async for s in self.engine.stream(prompts, timeout_s=timeout_s, start_timestamp=start_timestamp, lock=self._base_worker_group_lock):
201201
yield s
202202

203203
# Called by Serve to check the replica's health.

0 commit comments

Comments
 (0)