Skip to content

Commit 2eee472

Browse files
committed
docker compose 启动方式修改
1 parent 4e5ea37 commit 2eee472

File tree

5 files changed

+27
-11
lines changed

5 files changed

+27
-11
lines changed

Dockerfile.copy

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM docker.1ms.run/506610466/gpt_server:latest
2+
3+
COPY ./ /gpt_server
4+
5+
WORKDIR /gpt_server
6+
7+
CMD ["/bin/bash"]

docker-compose.yml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,23 @@ version: '3'
22
services:
33
gpt_server:
44
# 构建
5-
# build:
6-
# context: .
7-
# dockerfile: Dockerfile.copy
8-
image: docker.1ms.run/506610466/gpt_server:latest
5+
# 为什么每次构建更好?而不是直接使用 image: docker.1ms.run/506610466/gpt_server:latest
6+
# 如果使用 volumes 映射的方式,虽然启动更快,但会影响已启动容器的runtime稳定性,物理机修改的代码会在容器runtime中立马生效。
7+
build:
8+
context: .
9+
dockerfile: Dockerfile.copy
10+
# image: docker.1ms.run/506610466/gpt_server:latest
11+
image: gpt_server:latest_
912
container_name: gpt_server
10-
shm_size: '4g' # 设置共享内存为4GB
13+
shm_size: '8g' # 设置共享内存为4GB
1114
restart: always
1215
# network_mode: host
1316
ports:
1417
- 8082:8082
1518
environment:
1619
- TZ:Asia/Shanghai # 设置中国时区
1720
volumes:
18-
- ./gpt_server:/gpt_server/gpt_server # 将最新代码直接映射到容器中,以运行最新的代码
21+
# - ./gpt_server:/gpt_server/gpt_server # 将最新代码直接映射到容器中,以运行最新的代码
1922
- /home/dev/model/:/home/dev/model/ # 映射模型路径
2023
deploy:
2124
resources:

gpt_server/model_backend/lmdeploy_backend.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
TurbomindEngineConfig,
66
PytorchEngineConfig,
77
)
8+
from transformers import PreTrainedTokenizerBase
89
from typing import Any, Dict, AsyncGenerator
910
from lmdeploy.archs import get_task
1011
from gpt_server.model_handler.reasoning_parser import ReasoningParserManager
@@ -55,7 +56,7 @@ def is_messages_with_tool(messages: list):
5556

5657

5758
class LMDeployBackend(ModelBackend):
58-
def __init__(self, model_path) -> None:
59+
def __init__(self, model_path, tokenizer: PreTrainedTokenizerBase) -> None:
5960
backend = backend_map[os.getenv("backend")]
6061
enable_prefix_caching = bool(os.getenv("enable_prefix_caching", False))
6162
max_model_len = os.getenv("max_model_len", None)

gpt_server/model_backend/sglang_backend.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from loguru import logger
99
from PIL import Image
1010
import sglang as sgl
11+
from transformers import PreTrainedTokenizerBase
1112
from sglang.utils import convert_json_schema_to_str
1213
from sglang.srt.conversation import generate_chat_conv
1314

@@ -44,7 +45,7 @@ def _transform_messages(
4445

4546

4647
class SGLangBackend(ModelBackend):
47-
def __init__(self, model_path) -> None:
48+
def __init__(self, model_path, tokenizer: PreTrainedTokenizerBase) -> None:
4849
lora = os.getenv("lora", None)
4950
enable_prefix_caching = bool(os.getenv("enable_prefix_caching", False))
5051
max_model_len = os.getenv("max_model_len", None)

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,16 @@ def load_model_tokenizer(self, model_path):
139139
from gpt_server.model_backend.sglang_backend import SGLangBackend
140140

141141
logger.info(f"{self.worker_name} 使用 SGLang 后端")
142-
self.backend = SGLangBackend(model_path=self.model_path)
142+
self.backend = SGLangBackend(
143+
model_path=self.model_path, tokenizer=self.tokenizer
144+
)
143145
elif "lmdeploy" in os.getenv("backend"):
144146
from gpt_server.model_backend.lmdeploy_backend import LMDeployBackend
145147

146148
logger.info(f"{self.worker_name} 使用 LMDeploy 后端")
147-
self.backend = LMDeployBackend(model_path=self.model_path)
149+
self.backend = LMDeployBackend(
150+
model_path=self.model_path, tokenizer=self.tokenizer
151+
)
148152

149153
elif os.getenv("backend") == "hf":
150154
from gpt_server.model_backend.hf_backend import HFBackend
@@ -255,7 +259,7 @@ def run(cls):
255259
logger.remove(0)
256260
log_level = os.getenv("log_level", "WARNING")
257261
logger.add(sys.stderr, level=log_level)
258-
262+
259263
host = args.host
260264
controller_address = args.controller_address
261265

0 commit comments

Comments
 (0)