@@ -1280,6 +1280,17 @@ class Deployer(ABC):
12801280 "AMD_VISIBLE_DEVICES": ["0", "1"]
12811281 }.
12821282 """
1283+ _visible_devices_values_alignment : dict [str , dict [str , str ]] | None = None
1284+ """
1285+ Recorded visible devices values alignment,
1286+ the key is the runtime visible devices env name,
1287+ the value is the mapping from backend device index to aligned index.
1288+ For example:
1289+ {
1290+ "NVIDIA_VISIBLE_DEVICES": {"0": "0"},
1291+ "AMD_VISIBLE_DEVICES": {"0": "0", "1": "1"}
1292+ }.
1293+ """
12831294
12841295 @staticmethod
12851296 @abstractmethod
@@ -1325,6 +1336,7 @@ def _fetch_visible_devices_env_values(self):
13251336
13261337 self ._visible_devices_env = {}
13271338 self ._visible_devices_values = {}
1339+ self ._visible_devices_values_alignment = {}
13281340
13291341 devices : dict [ManufacturerEnum , Devices ] = {}
13301342 for dev in detect_devices (fast = False ):
@@ -1333,11 +1345,6 @@ def _fetch_visible_devices_env_values(self):
13331345 devices [dev .manufacturer ].append (dev )
13341346
13351347 if devices :
1336- value_with_index = (
1337- envs .GPUSTACK_RUNTIME_DEPLOY_RUNTIME_VISIBLE_DEVICES_VALUE_MODE .lower ()
1338- == "index"
1339- )
1340-
13411348 for manu , devs in devices .items ():
13421349 backend = manufacturer_to_backend (manu )
13431350 rk = envs .GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY .get (backend )
@@ -1348,18 +1355,30 @@ def _fetch_visible_devices_env_values(self):
13481355 rk ,
13491356 )
13501357 if ren and ben :
1358+ dev_uuids : list [str ] = []
1359+ dev_indexes : list [str ] = []
1360+ for dev in devs :
1361+ dev_uuids .append (dev .uuid )
1362+ dev_indexes .append (str (dev .index ))
1363+ dev_indexes_alignment : dict [str , str ] = {
1364+ dev_indexes [i ]: str (i ) for i in range (len (devs ))
1365+ }
13511366 self ._visible_devices_env [ren ] = ben
1352- self ._visible_devices_values [ren ] = [
1353- (str (dev .index ) if value_with_index else dev .uuid )
1354- for dev in devs
1355- ]
1367+ self ._visible_devices_values [ren ] = (
1368+ dev_uuids
1369+ if ren
1370+ in envs .GPUSTACK_RUNTIME_DEPLOY_RUNTIME_VISIBLE_DEVICES_VALUE_UUID
1371+ else dev_indexes
1372+ )
1373+ self ._visible_devices_values_alignment [ren ] = dev_indexes_alignment
13561374
13571375 if self ._visible_devices_env :
13581376 return
13591377
13601378 # Fallback to unknown backend
13611379 self ._visible_devices_env ["UNKNOWN_RUNTIME_VISIBLE_DEVICES" ] = []
13621380 self ._visible_devices_values ["UNKNOWN_RUNTIME_VISIBLE_DEVICES" ] = ["all" ]
1381+ self ._visible_devices_values_alignment ["UNKNOWN_RUNTIME_VISIBLE_DEVICES" ] = {}
13631382
13641383 def visible_devices_env_values (
13651384 self ,
@@ -1389,6 +1408,44 @@ def visible_devices_env_values(
13891408 self ._fetch_visible_devices_env_values ()
13901409 return self ._visible_devices_env , self ._visible_devices_values
13911410
1411+ def align_backend_visible_devices_env_values (
1412+ self ,
1413+ backend_visible_devices_env : str ,
1414+ resource_key_values : str ,
1415+ ) -> str :
1416+ """
1417+ Return the aligned backend visible devices environment variable values.
1418+ For example, if the backend visible devices env is "ASCEND_RT_VISIBLE_DEVICES",
1419+ and the `resource_key_values` is "4,6", and the detected devices are with indexes
1420+ [4,5,6,7], then the aligned result will be "0,2".
1421+
1422+ Args:
1423+ backend_visible_devices_env:
1424+ The backend visible devices environment variable name.
1425+ resource_key_values:
1426+ The resource key values to align.
1427+
1428+ Returns:
1429+ The aligned backend visible devices environment variable values.
1430+ If no alignment is needed, return the original `resource_key_values`.
1431+
1432+ """
1433+ if (
1434+ backend_visible_devices_env
1435+ not in envs .GPUSTACK_RUNTIME_DEPLOY_BACKEND_VISIBLE_DEVICES_VALUE_ALIGNMENT
1436+ ):
1437+ return resource_key_values
1438+ self ._fetch_visible_devices_env_values ()
1439+ return "," .join (
1440+ [
1441+ self ._visible_devices_values_alignment [backend_visible_devices_env ].get (
1442+ v ,
1443+ v ,
1444+ )
1445+ for v in resource_key_values .split ("," )
1446+ ],
1447+ )
1448+
13921449 @property
13931450 def name (self ) -> str :
13941451 """
0 commit comments