Skip to content

Commit 0ef48cb

Browse files
SangChengCliujiacheng
authored andcommitted
[0826]modify visual server
1 parent 3561a17 commit 0ef48cb

File tree

12 files changed

+226
-444
lines changed

12 files changed

+226
-444
lines changed

lightllm/models/vit/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def encode(self, images: List[ImageItem]):
178178
for i, img in enumerate(images):
179179
if isinstance(img, ImageItem):
180180
uuids.append(img.uuid)
181-
image_data = read_shm(get_shm_name_data(img.uuid))
181+
image_data = img._preload_data
182182
image_data = Image.open(BytesIO(image_data))
183183
t = self.load_image_func(image_data, max_num=img.extra_params["image_patch_max_num"])
184184
img_tensors.append(t)

lightllm/server/api_http.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from .httpserver.manager import HttpServerManager
4343
from .visualserver.manager import VisualManager
4444
from .httpserver_for_pd_master.manager import HttpServerManagerForPDMaster
45-
from .httpserver_for_visual_only.manager import HttpServerManagerForVisualOnly
4645
from .api_lightllm import lightllm_get_score, lightllm_get_image_embedding
4746
from lightllm.utils.envs_utils import get_env_start_args, get_lightllm_websocket_max_message_size
4847
from lightllm.utils.log_utils import init_logger
@@ -70,7 +69,7 @@ class G_Objs:
7069
args: object = None
7170
g_generate_func: Callable = None
7271
g_generate_stream_func: Callable = None
73-
httpserver_manager: Union[HttpServerManager, HttpServerManagerForPDMaster, HttpServerManagerForVisualOnly] = None
72+
httpserver_manager: Union[HttpServerManager, HttpServerManagerForPDMaster, VisualManager] = None
7473
visual_manager: VisualManager = None
7574
shared_token_load: TokenLoad = None
7675

@@ -94,11 +93,12 @@ def set_args(self, args):
9493
)
9594
elif args.run_mode == "visual_only":
9695
self.metric_client = MetricClient(args.metric_port)
97-
self.httpserver_manager = HttpServerManagerForVisualOnly(
96+
self.httpserver_manager = VisualManager(
9897
args,
99-
cache_port=args.cache_port,
98+
next_module_port=None,
10099
visual_port=args.visual_port,
101-
metric_port=args.metric_port,
100+
cache_port=None,
101+
visual_model_rpc_ports=args.visual_model_rpc_ports,
102102
)
103103
elif args.run_mode == "llm_only":
104104
init_tokenizer(args) # for openai api
@@ -372,6 +372,10 @@ async def startup_event():
372372
logger.info("server start up")
373373
loop = asyncio.get_event_loop()
374374
g_objs.set_args(get_env_start_args())
375-
loop.create_task(g_objs.httpserver_manager.handle_loop())
375+
if g_objs.args.run_mode == "visual_only":
376+
await g_objs.httpserver_manager.wait_to_model_ready()
377+
loop.create_task(g_objs.httpserver_manager.loop_for_fwd_visual_only())
378+
else:
379+
loop.create_task(g_objs.httpserver_manager.handle_loop())
376380
logger.info(f"server start up ok, loop use is {asyncio.get_event_loop()}")
377381
return

lightllm/server/api_lightllm.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from lightllm.server.core.objs.sampling_params import SamplingParams
66
from .multimodal_params import MultimodalParams
77
from .httpserver.manager import HttpServerManager
8-
from .httpserver_for_visual_only.manager import HttpServerManagerForVisualOnly
8+
from .visualserver.manager import VisualManager
99
from fastapi.responses import JSONResponse
1010
import ujson as json
1111

@@ -140,9 +140,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
140140
return StreamingResponse(stream_results(), media_type="text/event-stream", background=background_tasks)
141141

142142

143-
async def lightllm_get_image_embedding(
144-
request: Request, httpserver_manager: HttpServerManagerForVisualOnly
145-
) -> Response:
143+
async def lightllm_get_image_embedding(request: Request, httpserver_manager: VisualManager) -> Response:
146144
request_dict = await request.json()
147145
# request_dict: {'parameters': {'max_new_tokens': 128},
148146
# 'multimodal_params': {'images': [{'type': 'base64', 'data': 'base64'}]}}
@@ -154,6 +152,5 @@ async def lightllm_get_image_embedding(
154152
multimodal_params = MultimodalParams(**multimodal_params_dict)
155153

156154
await httpserver_manager.generate(sampling_params, multimodal_params, request=request)
157-
# 5. Return JSON result
158-
print("embedding OK")
155+
159156
return JSONResponse({"message": "OK"}, status_code=200)

lightllm/server/api_start.py

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -548,17 +548,16 @@ def visual_only_start(args):
548548
return
549549
already_uesd_ports = args.visual_nccl_ports + [args.nccl_port, args.port]
550550
can_use_ports = alloc_can_use_network_port(
551-
num=5 + args.visual_dp * args.visual_tp, used_nccl_ports=already_uesd_ports
551+
num=4 + args.visual_dp * args.visual_tp, used_nccl_ports=already_uesd_ports
552552
)
553553
logger.info(f"alloced ports: {can_use_ports}")
554554
(
555555
router_port,
556556
visual_port,
557557
audio_port,
558-
cache_port,
559558
metric_port,
560-
) = can_use_ports[0:5]
561-
can_use_ports = can_use_ports[5:]
559+
) = can_use_ports[0:4]
560+
can_use_ports = can_use_ports[4:]
562561

563562
visual_model_tp_ports = []
564563
for _ in range(args.visual_dp):
@@ -570,7 +569,6 @@ def visual_only_start(args):
570569
args.router_port = router_port
571570
args.visual_port = visual_port
572571
args.audio_port = audio_port
573-
args.cache_port = cache_port
574572
args.metric_port = metric_port
575573
args.visual_model_rpc_ports = visual_model_tp_ports
576574

@@ -585,33 +583,17 @@ def visual_only_start(args):
585583
start_args=[(metric_port, args)],
586584
)
587585

588-
from .visualserver.manager import start_visual_process
589-
590-
process_manager.start_submodule_processes(
591-
start_funcs=[
592-
start_cache_manager,
593-
],
594-
start_args=[(cache_port, args)],
595-
)
596-
process_manager.start_submodule_processes(
597-
start_funcs=[
598-
start_visual_process,
599-
],
600-
start_args=[
601-
(args, audio_port, visual_port, cache_port, visual_model_tp_ports),
602-
],
603-
)
604-
if args.enable_multimodal_audio:
605-
from .audioserver.manager import start_audio_process
606-
607-
process_manager.start_submodule_processes(
608-
start_funcs=[
609-
start_audio_process,
610-
],
611-
start_args=[
612-
(args, router_port, audio_port, cache_port),
613-
],
614-
)
586+
# if args.enable_multimodal_audio:
587+
# from .audioserver.manager import start_audio_process
588+
589+
# process_manager.start_submodule_processes(
590+
# start_funcs=[
591+
# start_audio_process,
592+
# ],
593+
# start_args=[
594+
# (args, router_port, audio_port, cache_port),
595+
# ],
596+
# )
615597

616598
# 启动 gunicorn
617599
command = [

lightllm/server/core/objs/io_objs/group_req.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
from ..req import Req
55

66

7+
@dataclass
8+
class VisualOnlyReqIndexes:
9+
group_req_id: int
10+
multimodal_params: MultimodalParams
11+
12+
713
@dataclass
814
class GroupReqIndexes:
915
group_req_id: int

lightllm/server/core/objs/req.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,30 @@ def init(
153153

154154
self.post_init()
155155

156+
def init_visual_only(
157+
self,
158+
request_id: int,
159+
):
160+
# 只是为了有更好的编码辅助类型提示
161+
self.index_in_shm_mem: int = self.index_in_shm_mem
162+
self.ref_count: int = self.ref_count
163+
164+
self.request_id = request_id
165+
self.group_req_id = convert_sub_id_to_group_id(request_id)
166+
self.is_paused = False
167+
self.finish_status = FinishStatus()
168+
self.is_aborted = False
169+
self.router_aborted = False
170+
self.shm_infer_released = False
171+
self.shm_cur_kv_len = 0
172+
self.shm_cur_output_len = 0
173+
self.candetoken_out_len = 0
174+
self.prompt_cache_len = 0
175+
self.finish_token_index = -1
176+
self.can_released_mark = False
177+
178+
self.post_init()
179+
156180
def post_init(self):
157181
# 子类继承进行一些额外的初始化操作
158182
pass
@@ -206,7 +230,9 @@ def can_release(self):
206230
# 只有管理节点有一个引用
207231
ref_count_ok = self.ref_count == 1
208232
can_released_mark = self.can_released_mark
209-
233+
print(f"self.is_aborted is {self.is_aborted}")
234+
print(f"self.finish_status.is_finished() is {self.finish_status.is_finished()}")
235+
print(f"self.ref_count is {self.ref_count}")
210236
if self.is_aborted and can_released_mark and ref_count_ok:
211237
return True
212238

lightllm/server/embed_cache/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def create_shm(name, data):
4444
def create_afs(name, data):
4545
try:
4646
data_size = len(data)
47-
path = os.path.join(get_env_start_args().visual_embed_path, name)
47+
path = os.path.join("/mtc/sangchengmeng/afs", name)
4848
with open(path, "xb") as f:
4949
mem_view = memoryview(data)
5050
f.write(mem_view[:data_size])
@@ -79,7 +79,6 @@ def get_shm_name_data(uid):
7979
def get_shm_name_embed(uid):
8080
return str(uid) + "-embed"
8181

82-
8382
"""
8483
Importable Redis-backed MD5 refcount with LRU eviction.
8584
@@ -377,4 +376,4 @@ def _delete_afs_files(self, victims: List[str]) -> None:
377376
else
378377
return {0} -- 逐出失败,没有足够的候选
379378
end
380-
"""
379+
"""

lightllm/server/httpserver_for_visual_only/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)