Skip to content

Commit 5c7cfec

Browse files
committed
add bundle indice test
Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com>
1 parent f3dbacd commit 5c7cfec

File tree

1 file changed

+60
-3
lines changed

1 file changed

+60
-3
lines changed

tests/unittest/_torch/ray_orchestrator/multi_gpu/test_executor.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import os
22

33
import pytest
4+
import ray
5+
from ray.util.placement_group import (PlacementGroupSchedulingStrategy,
6+
placement_group, remove_placement_group)
47
from utils.llm_data import llm_models_root
58

69
from tensorrt_llm import LLM
710
from tensorrt_llm._torch.utils import get_device_uuid
11+
from tensorrt_llm.llmapi import KvCacheConfig
812

913

1014
class DummyWorkerExtension:
@@ -22,17 +26,70 @@ def test_worker_extension():
2226
assert result[0] == "SUCCESS"
2327

2428

29+
@pytest.mark.gpu4
30+
def test_bundle_indices(monkeypatch):
31+
"""Placement via bundle indices"""
32+
33+
monkeypatch.setenv("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES", "1")
34+
monkeypatch.setenv("TLLM_RAY_USE_RPC", "1")
35+
36+
pg = None
37+
try:
38+
ray.init()
39+
pg = placement_group([{"GPU": 1, "CPU": 1}] * 4)
40+
ray.get(pg.ready())
41+
42+
bundle_indices = [2, 3]
43+
runtime_env = {
44+
"env_vars": {
45+
"TRTLLM_RAY_PER_WORKER_GPUS": "0.8",
46+
"TRTLLM_RAY_BUNDLE_INDICES": ",".join(map(str, bundle_indices))
47+
}
48+
}
49+
50+
llm = ray.remote(
51+
num_cpus=0,
52+
num_gpus=0,
53+
runtime_env=runtime_env,
54+
scheduling_strategy=PlacementGroupSchedulingStrategy(
55+
placement_group=pg,
56+
placement_group_capture_child_tasks=True,
57+
),
58+
)(LLM).remote(
59+
model=os.path.join(llm_models_root(), "llama-models-v2",
60+
"TinyLlama-1.1B-Chat-v1.0"),
61+
kv_cache_config=KvCacheConfig(free_gpu_memory_fraction=0.1),
62+
tensor_parallel_size=2,
63+
orchestrator_type="ray",
64+
)
65+
66+
inference_actor_uuids = ray.get(
67+
llm._collective_rpc.remote("report_device_id"))
68+
69+
expected_uuids = [get_device_uuid(idx) for idx in bundle_indices]
70+
[get_device_uuid(idx) for idx in range(4)]
71+
72+
print(f"{inference_actor_uuids=}")
73+
74+
assert sorted(inference_actor_uuids) == sorted(expected_uuids), \
75+
f"Workers not placed on expected GPUs. Expected UUIDs: {expected_uuids}, Got: {inference_actor_uuids}"
76+
77+
finally:
78+
if pg is not None:
79+
remove_placement_group(pg)
80+
ray.shutdown()
81+
82+
2583
@pytest.mark.gpu2
26-
def test_cuda_visible_device():
84+
def test_cuda_visible_device(monkeypatch):
2785
"""Placement via cuda_visible_device"""
28-
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
86+
monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "1")
2987

3088
llm = LLM(model=llm_models_root() /
3189
"llama-models-v2/TinyLlama-1.1B-Chat-v1.0",
3290
orchestrator_type="ray")
3391

3492
infer_actor_uuids = llm._collective_rpc("report_device_id")
3593

36-
del os.environ["CUDA_VISIBLE_DEVICES"]
3794
assert infer_actor_uuids[0] == get_device_uuid(1)
3895
print(f"{infer_actor_uuids=}")

0 commit comments

Comments
 (0)