Skip to content

Commit d59b5d8

Browse files
authored
Update ray to 2.9.3 (#56)
* Update ray to 2.9.3 * update prompt
1 parent a636a52 commit d59b5d8

File tree

5 files changed

+11
-16
lines changed

5 files changed

+11
-16
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ pip install '.[vllm]' -i https://pypi.tuna.tsinghua.edu.cn/simple/
6565
Pip install Ray:
6666

6767
```
68-
pip install -U "ray[serve-grpc]==2.8.0"
68+
pip install -U "ray[serve-grpc]==2.9.3"
6969
```
7070

7171
Option to use another pip source for faster transfer if needed.
7272

7373
```
74-
pip install -U "ray[serve-grpc]==2.8.0" -i https://pypi.tuna.tsinghua.edu.cn/simple/
74+
pip install -U "ray[serve-grpc]==2.9.3" -i https://pypi.tuna.tsinghua.edu.cn/simple/
7575
```
7676

7777
> **Note:** ChatGLM2-6b requires transformers<=4.33.3, while the latest vllm requires transformers>=4.36.0.
@@ -82,7 +82,7 @@ Start cluster then:
8282
ray start --head --port=6379 --dashboard-host=0.0.0.0 --dashboard-port=8265
8383
```
8484

85-
See reference [here](https://docs.ray.io/en/releases-2.8.0/ray-overview/installation.html).
85+
See reference [here](https://docs.ray.io/en/releases-2.9.3/ray-overview/installation.html).
8686

8787
#### Quick start
8888

deploy/ray/Dockerfile-base

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli &&
55
RUN conda update -n base -c defaults conda -y
66
RUN conda install python=3.10 -y
77
RUN pip install --upgrade pip
8-
RUN pip install -U "ray[serve-grpc]==2.8.0"
8+
RUN pip install -U "ray[serve-grpc]==2.9.3"
99
RUN pip install -i https://download.pytorch.org/whl/cu118 torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2

llmserve/backend/server/app.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ async def generate_text(self, prompt: Prompt):
194194
# priority=QueuePriority.GENERATE_TEXT,
195195
# start_timestamp=start_timestamp,
196196
)
197+
logger.info(f"generated text: {text}")
197198
# return text[0]
198199
return text
199200

@@ -351,13 +352,9 @@ async def predict(self, model: str, prompt: Union[Prompt, List[Prompt]]) -> Unio
351352
logger.info(f"set modelID: {item}")
352353
logger.info(f"search model key {modelID}")
353354
if isinstance(prompt, Prompt):
354-
results = await asyncio.gather(
355-
*(await asyncio.gather(*[self._models[modelID].generate_text.remote(prompt)]))
356-
)
355+
results = await asyncio.gather(*[self._models[modelID].generate_text.remote(prompt)])
357356
elif isinstance(prompt, list):
358-
results = await asyncio.gather(
359-
*(await asyncio.gather(*[self._models[modelID].batch_generate_text.remote(prompt)]))
360-
)
357+
results = await asyncio.gather(*[self._models[modelID].batch_generate_text.remote(prompt)])
361358
else:
362359
raise Exception("Invaid prompt format.")
363360
logger.info(f"{results}")
@@ -418,9 +415,7 @@ async def query(self, *args) -> Dict[str, Dict[str, Any]]:
418415
else:
419416
prompts = args[0]
420417
logger.info(f"ExperimentalDeployment query.prompts {prompts}")
421-
results = await asyncio.gather(
422-
*(await asyncio.gather(*[self._model.generate_text.remote(Prompt(prompt=prompts, use_prompt_format=False))]))
423-
)
418+
results = await asyncio.gather(*[(self._model.generate_text.remote(Prompt(prompt=prompts, use_prompt_format=False)))])
424419
logger.info(f"ExperimentalDeployment query.results {results}")
425420
results = results[0]
426421
return results.generated_text

models/text-generation--Qwen--Qwen1.5-72B-Chat.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ model_config:
3333
max_batch_size: 1
3434
batch_wait_timeout_s: 0
3535
generate_kwargs:
36-
bos_token_id: 151643,
36+
bos_token_id: 151643
3737
# pad_token_id: 151643,
3838
# eos_token_id: [151645, 151643],
3939
do_sample: false
@@ -42,7 +42,7 @@ model_config:
4242
temperature: 0.7
4343
top_p: 0.8
4444
top_k: 20
45-
prompt_format: "'role': 'user', 'content': {instruction}"
45+
prompt_format: '[{{"role": "system", "content": "You are a helpful assistant."}},{{"role": "user", "content": "{instruction}"}}]'
4646
# stopping_sequences: ["### Response:", "### End"]
4747
scaling_config:
4848
num_workers: 1

models/text-generation--gpt2.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
deployment_config:
22
autoscaling_config:
3-
min_replicas: 0
3+
min_replicas: 1
44
initial_replicas: 1
55
max_replicas: 8
66
target_num_ongoing_requests_per_replica: 1.0

0 commit comments

Comments
 (0)