Skip to content

Commit f86964f

Browse files
authored
dlinfer backend support ray (#3903)
* dlinfer backend support ray * remove ppu change
1 parent f0b00bb commit f86964f

File tree

6 files changed

+24
-6
lines changed

6 files changed

+24
-6
lines changed

lmdeploy/pytorch/backends/dlinfer/camb/op_backend.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,8 @@ def build_graph_runner(model: torch.nn.Module, model_config: ModelConfig, cache_
123123
"""Build graph runner."""
124124
from lmdeploy.pytorch.backends.cuda.graph_runner import CUDAGraphRunner
125125
return CUDAGraphRunner(model, model_config, cache_config, backend_config, device)
126+
127+
@staticmethod
128+
def support_ray():
129+
"""Support ray."""
130+
return True

lmdeploy/pytorch/backends/dlinfer/maca/op_backend.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,8 @@ def build_graph_runner(model: torch.nn.Module, model_config: ModelConfig, cache_
109109
"""Build graph runner."""
110110
from lmdeploy.pytorch.backends.cuda.graph_runner import CUDAGraphRunner
111111
return CUDAGraphRunner(model, model_config, cache_config, backend_config, device)
112+
113+
@staticmethod
114+
def support_ray():
115+
"""Support ray."""
116+
return True

lmdeploy/pytorch/engine/executor/ray_executor.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,11 +549,13 @@ def _init_workers_ray(self, placement_group: PlacementGroup, worker_kwargs: dict
549549
def _init_distributed_environment_by_device(self, device_str: str):
550550
"""Init distributed environment."""
551551
driver_ip = _get_master_addr()
552-
if device_str == 'cuda':
552+
if device_str in ['cuda', 'maca']:
553553
self.workers = self._sort_workers(driver_ip, self.workers)
554554

555555
elif device_str == 'ascend':
556556
self._init_ascend_distributed_environment(driver_ip)
557+
elif device_str == 'camb':
558+
self._init_camb_distributed_environment(driver_ip)
557559
else:
558560
raise ValueError(f'Unsupported device type: {device_str}')
559561

@@ -576,6 +578,10 @@ def _init_ascend_distributed_environment(self, driver_ip):
576578
else:
577579
self.workers = self._sort_workers(driver_ip, self.workers)
578580

581+
def _init_camb_distributed_environment(self, driver_ip):
582+
self.workers = self._sort_workers(driver_ip, self.workers)
583+
ray.get([worker.set_device.remote(idx) for idx, worker in enumerate(self.workers)])
584+
579585
""" PD Disaggregation API Begin """
580586

581587
def p2p_initialize(self, init_request: DistServeInitRequest):

lmdeploy/pytorch/ray.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
def get_device_str(device_type: str = None) -> str:
1717
"""Get device str."""
1818
device_type = device_type or get_device_manager().current_context().device_type
19-
if device_type == 'cuda':
19+
if device_type in ['cuda', 'maca']:
2020
device_type = 'GPU'
2121
elif device_type == 'ascend':
2222
device_type = 'NPU'
23+
elif device_type == 'camb':
24+
device_type = 'MLU'
2325
else:
2426
raise ValueError(f'Unsupported device type: {device_type}')
2527

requirements/runtime_camb.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ safetensors
1616
sentencepiece
1717
shortuuid
1818
tiktoken
19-
torch==2.4.0
20-
torchvision<=0.19.0,>=0.15.0
19+
torch<=2.6.0,>=2.4.0
20+
torchvision<=0.21.0,>=0.15.0
2121
transformers
2222
uvicorn

requirements/runtime_maca.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ safetensors
1616
sentencepiece
1717
shortuuid
1818
tiktoken
19-
torch<=2.4.0,>=2.0.0
20-
torchvision<=0.19.0,>=0.15.0
19+
torch<=2.6.0,>=2.0.0
20+
torchvision<=0.21.0,>=0.15.0
2121
transformers
2222
triton>=2.1.0; sys_platform == "linux"
2323
uvicorn

0 commit comments

Comments
 (0)