Skip to content

Commit f78d658

Browse files
committed
Merge remote-tracking branch 'origin/main' into wzj_pd
2 parents 49d8adb + 2fde157 commit f78d658

File tree

8 files changed

+29
-14
lines changed

8 files changed

+29
-14
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ RUN pip install -r /lightllm/requirements.txt --no-cache-dir --ignore-installed
4141
RUN pip install --no-cache-dir nvidia-nccl-cu12==2.25.1 # for allreduce hang issues in multinode H100
4242

4343
RUN git clone https://github.com/Dao-AILab/flash-attention.git -b v2.7.4.post1
44-
RUN cd flash-attention/hopper && NVCC_THREADS=16 python setup.py install
44+
RUN cd flash-attention/hopper && MAX_JOBS=4 NVCC_THREADS=16 python setup.py install
4545

4646
COPY . /lightllm
4747
RUN pip install -e /lightllm --no-cache-dir

lightllm/models/deepseek2/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def _init_some_value(self):
7474

7575
def _init_custom(self):
7676
self._init_to_get_yarn_rotary()
77+
dist_group_manager.new_deepep_group(self.config["n_routed_experts"])
7778

7879
def _verify_params(self):
7980
return super()._verify_params()

lightllm/server/api_http.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ async def tokens(request: Request):
368368

369369
multimodal_params_dict = request_dict.get("multimodal_params", {})
370370
multimodal_params = MultimodalParams(**multimodal_params_dict)
371-
await multimodal_params.verify_and_preload()
371+
await multimodal_params.verify_and_preload(request)
372372
return JSONResponse(
373373
{
374374
"ntokens": g_objs.httpserver_manager.tokens(

lightllm/server/httpserver/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ async def generate(
228228
original_multimodal_params = copy.deepcopy(multimodal_params)
229229

230230
if self.pd_mode.is_P_or_NORMAL():
231-
await multimodal_params.verify_and_preload()
231+
await multimodal_params.verify_and_preload(request)
232232

233233
# 记录请求到达的相关信息
234234
await self._log_req_header(request_headers, group_request_id)

lightllm/server/multimodal_params.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from PIL import Image
77
from lightllm.utils.image_utils import fetch_image
88
import base64
9+
from fastapi import Request
910

1011

1112
class ImageItem:
@@ -24,11 +25,12 @@ def __init__(self, **kwargs):
2425
self._preload_data = None
2526
self.extra_params = {}
2627

27-
async def preload(self):
28+
async def preload(self, request: Request):
2829
try:
2930
if self._type == "url":
3031
timeout = int(os.getenv("REQUEST_TIMEOUT", "5"))
31-
img_data = await fetch_image(self._data, timeout=timeout)
32+
proxy = os.getenv("REQUEST_PROXY", None)
33+
img_data = await fetch_image(self._data, request, timeout=timeout, proxy=proxy)
3234
elif self._type == "base64":
3335
img_data = base64.b64decode(self._data)
3436
elif self._type == "image_size":
@@ -81,9 +83,9 @@ def __init__(
8183
self.images = [ImageItem(**i) for i in images]
8284
return
8385

84-
async def verify_and_preload(self):
86+
async def verify_and_preload(self, request: Request):
8587
for image in self.images:
86-
await image.preload()
88+
await image.preload(request)
8789
return
8890

8991
def to_dict(self):

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ def init_model(self, kvargs):
192192
elif self.model_type == "phi3":
193193
self.model = Phi3TpPartModel(model_kvargs)
194194
elif self.model_type in ["deepseek_v2", "deepseek_v3"]:
195-
dist_group_manager.new_deepep_group(model_cfg["n_routed_experts"])
196195
self.model = Deepseek2TpPartModel(model_kvargs)
197196
elif self.model_type == "internvl_chat":
198197
llm_model_type = model_cfg.get("llm_config").get("model_type")

lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def init_custom(self):
3131
# nan 值,避免后续构建的fake请求在计算的过程中出现计算错误。
3232
from .pre_process import padded_prepare_prefill_inputs
3333

34-
kwargs, run_reqs, padded_req_num = padded_prepare_prefill_inputs([], 1, is_multimodal=False)
34+
kwargs, run_reqs, padded_req_num = padded_prepare_prefill_inputs([], 1, is_multimodal=self.is_multimodal)
3535
self.model.forward(**kwargs)
3636
assert len(run_reqs) == 0 and padded_req_num == 1
3737
return

lightllm/utils/image_utils.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1+
import time
12
import base64
23
import httpx
4+
import logging
35
from PIL import Image
46
from io import BytesIO
7+
from fastapi import Request
8+
from lightllm.utils.log_utils import init_logger
9+
10+
logger = init_logger(__name__)
511

612

713
def image2base64(img_str: str):
@@ -13,17 +19,24 @@ def image2base64(img_str: str):
1319
return base64.b64encode(buffer.getvalue()).decode("utf-8")
1420

1521

16-
async def fetch_image(url, timeout):
17-
async with httpx.AsyncClient() as client:
22+
async def fetch_image(url, request: Request, timeout, proxy=None):
23+
logger.info(f"Begin to download image from url: {url}")
24+
start_time = time.time()
25+
async with httpx.AsyncClient(proxy=proxy) as client:
1826
async with client.stream("GET", url, timeout=timeout) as response:
1927
response.raise_for_status()
2028
ans_bytes = []
21-
2229
async for chunk in response.aiter_bytes(chunk_size=1024 * 1024):
30+
if request is not None and await request.is_disconnected():
31+
await response.aclose()
32+
raise Exception("Request disconnected. User cancelled download.")
2333
ans_bytes.append(chunk)
2434
# 接收的数据不能大于128M
2535
if len(ans_bytes) > 128:
26-
raise Exception("image data is too big")
36+
raise Exception(f"url {url} Image data is too big")
2737

2838
content = b"".join(ans_bytes)
29-
return content
39+
end_time = time.time()
40+
cost_time = end_time - start_time
41+
logger.info(f"Download url {url} image cost time: {cost_time} seconds")
42+
return content

0 commit comments

Comments
 (0)