11import os
22
33import pytest
4+ import ray
5+ from ray .util .placement_group import (PlacementGroupSchedulingStrategy ,
6+ placement_group , remove_placement_group )
47from utils .llm_data import llm_models_root
58
69from tensorrt_llm import LLM
710from tensorrt_llm ._torch .utils import get_device_uuid
11+ from tensorrt_llm .llmapi import KvCacheConfig
812
913
1014class DummyWorkerExtension :
@@ -22,17 +26,70 @@ def test_worker_extension():
2226 assert result [0 ] == "SUCCESS"
2327
2428
29+ @pytest .mark .gpu4
30+ def test_bundle_indices (monkeypatch ):
31+ """Placement via bundle indices"""
32+
33+ monkeypatch .setenv ("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES" , "1" )
34+ monkeypatch .setenv ("TLLM_RAY_USE_RPC" , "1" )
35+
36+ pg = None
37+ try :
38+ ray .init ()
39+ pg = placement_group ([{"GPU" : 1 , "CPU" : 1 }] * 4 )
40+ ray .get (pg .ready ())
41+
42+ bundle_indices = [2 , 3 ]
43+ runtime_env = {
44+ "env_vars" : {
45+ "TRTLLM_RAY_PER_WORKER_GPUS" : "0.8" ,
46+ "TRTLLM_RAY_BUNDLE_INDICES" : "," .join (map (str , bundle_indices ))
47+ }
48+ }
49+
50+ llm = ray .remote (
51+ num_cpus = 0 ,
52+ num_gpus = 0 ,
53+ runtime_env = runtime_env ,
54+ scheduling_strategy = PlacementGroupSchedulingStrategy (
55+ placement_group = pg ,
56+ placement_group_capture_child_tasks = True ,
57+ ),
58+ )(LLM ).remote (
59+ model = os .path .join (llm_models_root (), "llama-models-v2" ,
60+ "TinyLlama-1.1B-Chat-v1.0" ),
61+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.1 ),
62+ tensor_parallel_size = 2 ,
63+ orchestrator_type = "ray" ,
64+ )
65+
66+ inference_actor_uuids = ray .get (
67+ llm ._collective_rpc .remote ("report_device_id" ))
68+
69+ expected_uuids = [get_device_uuid (idx ) for idx in bundle_indices ]
70+ [get_device_uuid (idx ) for idx in range (4 )]
71+
72+ print (f"{ inference_actor_uuids = } " )
73+
74+ assert sorted (inference_actor_uuids ) == sorted (expected_uuids ), \
75+ f"Workers not placed on expected GPUs. Expected UUIDs: { expected_uuids } , Got: { inference_actor_uuids } "
76+
77+ finally :
78+ if pg is not None :
79+ remove_placement_group (pg )
80+ ray .shutdown ()
81+
82+
2583@pytest .mark .gpu2
26- def test_cuda_visible_device ():
84+ def test_cuda_visible_device (monkeypatch ):
2785 """Placement via cuda_visible_device"""
28- os . environ [ "CUDA_VISIBLE_DEVICES" ] = "1"
86+ monkeypatch . setenv ( "CUDA_VISIBLE_DEVICES" , "1" )
2987
3088 llm = LLM (model = llm_models_root () /
3189 "llama-models-v2/TinyLlama-1.1B-Chat-v1.0" ,
3290 orchestrator_type = "ray" )
3391
3492 infer_actor_uuids = llm ._collective_rpc ("report_device_id" )
3593
36- del os .environ ["CUDA_VISIBLE_DEVICES" ]
3794 assert infer_actor_uuids [0 ] == get_device_uuid (1 )
3895 print (f"{ infer_actor_uuids = } " )
0 commit comments