feat: support cpu/numa affinity in docker deploy

thxCode · thxCode · commit cb5cc4c266c4 · 2025-12-27T00:00:50.000+08:00
Signed-off-by: thxCode &lt;thxcode0824@gmail.com&gt;
diff --git a/gpustack_runtime/deployer/__types__.py b/gpustack_runtime/deployer/__types__.py
@@ -14,9 +14,10 @@
 
 from .. import envs
 from ..detector import (
-    Devices,
-    ManufacturerEnum,
+    Topology,
     detect_devices,
+    get_devices_topologies,
+    group_devices_by_manufacturer,
     manufacturer_to_backend,
 )
 from .__utils__ import (
@@ -1279,6 +1280,17 @@ class Deployer(ABC):
         "AMD_VISIBLE_DEVICES": ["0", "1"]
     }.
     """
+    _visible_devices_topologies: dict[str, Topology] | None = None
+    """
+    Recorded visible devices topologies,
+    the key is the runtime visible devices env name,
+    the value is the corresponding topology.
+    For example:
+    {
+        "NVIDIA_VISIBLE_DEVICES": Topology(...),
+        "AMD_VISIBLE_DEVICES": Topology(...)
+    }.
+    """
     _backend_visible_devices_values_alignment: dict[str, dict[str, str]] | None = None
     """
     Recorded backend visible devices values alignment,
@@ -1326,25 +1338,27 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
-    def _fetch_visible_devices_env_values(self):
+    def _prepare(self):
         """
-        Fetch the visible devices environment variables and values.
+        Detect devices once, and construct critical elements for post processing, including:
+        - Prepare visible devices environment variables mapping.
+        - Prepare visible devices values mapping.
+        - Prepare topology.
         """
         if self._visible_devices_env:
             return
 
         self._visible_devices_env = {}
         self._visible_devices_values = {}
+        self._visible_devices_topologies = {}
         self._backend_visible_devices_values_alignment = {}
 
-        devices: dict[ManufacturerEnum, Devices] = {}
-        for dev in detect_devices(fast=False):
-            if dev.manufacturer not in devices:
-                devices[dev.manufacturer] = []
-            devices[dev.manufacturer].append(dev)
+        group_devices = group_devices_by_manufacturer(
+            detect_devices(fast=False),
+        )
 
-        if devices:
-            for manu, devs in devices.items():
+        if group_devices:
+            for manu, devs in group_devices.items():
                 backend = manufacturer_to_backend(manu)
                 rk = envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.get(backend)
                 ren = envs.GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES.get(
@@ -1377,6 +1391,13 @@ def _fetch_visible_devices_env_values(self):
                             self._backend_visible_devices_values_alignment[ben_item] = (
                                 dev_indexes_alignment
                             )
+                    if (
+                        envs.GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY
+                        or envs.GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY
+                    ):
+                        topos = get_devices_topologies(devices=devs)
+                        if topos:
+                            self._visible_devices_topologies[ren] = topos[0]
 
             if self._visible_devices_env:
                 return
@@ -1385,7 +1406,7 @@ def _fetch_visible_devices_env_values(self):
         self._visible_devices_env["UNKNOWN_RUNTIME_VISIBLE_DEVICES"] = []
         self._visible_devices_values["UNKNOWN_RUNTIME_VISIBLE_DEVICES"] = ["all"]
 
-    def visible_devices_env_values(
+    def get_visible_devices_env_values(
         self,
     ) -> (dict[str, list[str]], dict[str, list[str]]):
         """
@@ -1410,9 +1431,44 @@ def visible_devices_env_values(
               to lists of device indexes or UUIDs.
 
         """
-        self._fetch_visible_devices_env_values()
+        self._prepare()
         return self._visible_devices_env, self._visible_devices_values
 
+    def get_visible_devices_affinities(
+        self,
+        runtime_env: list[str],
+        resource_value: str,
+    ) -> tuple[str, str]:
+        """
+        Get the CPU and NUMA affinities for the given runtime environment and resource value.
+
+        Args:
+            runtime_env:
+                The list of runtime visible devices environment variable names.
+            resource_value:
+                The resource value, which can be "all" or a comma-separated list of device indexes
+
+        Returns:
+            A tuple containing:
+            - A comma-separated string of CPU affinities.
+            - A comma-separated string of NUMA affinities.
+
+        """
+        dev_indexes = []
+        if resource_value != "all":
+            dev_indexes = [int(v.strip()) for v in resource_value.split(",")]
+
+        cpus_set: list[str] = []
+        numas_set: list[str] = []
+        for re_ in runtime_env:
+            topo = self._visible_devices_topologies.get(re_)
+            if topo:
+                cs, ns = topo.get_affinities(dev_indexes, deduplicate=False)
+                cpus_set.extend(cs)
+                numas_set.extend(ns)
+
+        return ",".join(set(cpus_set)), ",".join(set(numas_set))
+
     def align_backend_visible_devices_env_values(
         self,
         backend_visible_devices_env: str,
@@ -1440,7 +1496,7 @@ def align_backend_visible_devices_env_values(
             not in envs.GPUSTACK_RUNTIME_DEPLOY_BACKEND_VISIBLE_DEVICES_VALUE_ALIGNMENT
         ):
             return resource_key_values
-        self._fetch_visible_devices_env_values()
+        self._prepare()
         alignments = self._backend_visible_devices_values_alignment.get(
             backend_visible_devices_env,
         )
diff --git a/gpustack_runtime/deployer/docker.py b/gpustack_runtime/deployer/docker.py
@@ -944,7 +944,7 @@ def _create_containers(
             if c.resources:
                 r_k_runtime_env = workload.resource_key_runtime_env_mapping or {}
                 r_k_backend_env = workload.resource_key_backend_env_mapping or {}
-                vd_env, vd_values = self.visible_devices_env_values()
+                vd_env, vd_values = self.get_visible_devices_env_values()
                 for r_k, r_v in c.resources.items():
                     match r_k:
                         case "cpu":
@@ -1023,6 +1023,20 @@ def _create_containers(
                                         )
                                     )
 
+                            # Configure affinity if applicable.
+                            if (
+                                envs.GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY
+                                or envs.GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY
+                            ):
+                                cpus, numas = self.get_visible_devices_affinities(
+                                    runtime_env,
+                                    r_v,
+                                )
+                                if cpus:
+                                    create_options["cpuset_cpus"] = cpus
+                                if numas and envs.GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY:
+                                    create_options["cpuset_mems"] = numas
+
             # Parameterize mounts.
             self._append_container_mounts(
                 create_options,
diff --git a/gpustack_runtime/deployer/kuberentes.py b/gpustack_runtime/deployer/kuberentes.py
@@ -985,7 +985,7 @@ def _create_pod(
                 resources: dict[str, str] = {}
                 r_k_runtime_env = workload.resource_key_runtime_env_mapping or {}
                 r_k_backend_env = workload.resource_key_backend_env_mapping or {}
-                vd_env, vd_values = self.visible_devices_env_values()
+                vd_env, vd_values = self.get_visible_devices_env_values()
                 for r_k, r_v in c.resources.items():
                     if r_k in ("cpu", "memory"):
                         resources[r_k] = str(r_v)
diff --git a/gpustack_runtime/detector/__init__.py b/gpustack_runtime/detector/__init__.py
@@ -139,22 +139,27 @@ def get_devices_topologies(
         fast:
             If True, return topologies from the first supported detector.
             Otherwise, return topologies from all supported detectors.
+            Only works when `devices` is None.
 
     Returns:
         A list of Topology objects for each manufacturer group.
 
     """
-    if devices is None:
+    group = False
+    if not devices:
         devices = detect_devices(fast=fast)
-
-    topologies: list[Topology] = []
+        if not devices:
+            return []
+        group = True and not fast
 
     # Group devices by manufacturer.
-    group_devices = group_devices_by_manufacturer(devices)
-    if not group_devices:
-        return topologies
+    if group:
+        group_devices = group_devices_by_manufacturer(devices)
+    else:
+        group_devices = {devices[0].manufacturer: devices}
 
     # Get topology for each group.
+    topologies: list[Topology] = []
     for manu, devs in group_devices.items():
         det = _DETECTORS_MAP.get(manu)
         if det is not None:
@@ -163,7 +168,6 @@ def get_devices_topologies(
                 topologies.append(topo)
             if fast and topologies:
                 return topologies
-
     return topologies
 
 
diff --git a/gpustack_runtime/detector/__types__.py b/gpustack_runtime/detector/__types__.py
@@ -297,6 +297,46 @@ def stringify(self) -> list[list[str]]:
             ]
         return devices_info
 
+    def get_affinities(
+        self,
+        device_indexes: list[int] | int,
+        deduplicate: bool = True,
+    ) -> tuple[list[str], list[str]]:
+        """
+        Get the CPU and NUMA affinities for the given device indexes.
+
+        Args:
+            device_indexes:
+                A list of device indexes or a single device index.
+                If an empty list is provided, return all affinities.
+            deduplicate:
+                Whether to deduplicate the affinities.
+                If True, the returned lists will contain unique affinities only.
+
+        Returns:
+            A tuple containing:
+            - A list contains the CPU affinities for the given device indexes.
+            - A list contains the NUMA affinities for the given device indexes.
+
+        """
+        if isinstance(device_indexes, int):
+            device_indexes = [device_indexes]
+
+        cpu_affinities: list[str] = []
+        numa_affinities: list[str] = []
+        if not device_indexes:
+            cpu_affinities.extend(self.devices_cpu_affinities)
+            numa_affinities.extend(self.devices_numa_affinities)
+        else:
+            for index in sorted(set(device_indexes)):
+                cpu_affinities.append(self.devices_cpu_affinities[index])
+                numa_affinities.append(self.devices_numa_affinities[index])
+
+        if deduplicate:
+            cpu_affinities = list(set(cpu_affinities))
+            numa_affinities = list(set(numa_affinities))
+        return cpu_affinities, numa_affinities
+
 
 class TopologyDistanceEnum(int, Enum):
     """
diff --git a/gpustack_runtime/detector/amd.py b/gpustack_runtime/detector/amd.py
@@ -278,7 +278,8 @@ def get_topology(self, devices: Devices | None = None) -> Topology | None:
 
         def get_device_handle(dev: Device):
             if bdf := dev.appendix.get("bdf", None):
-                return pyamdsmi.amdsmi_get_processor_handle_from_bdf(bdf)
+                with contextlib.suppress(pyamdsmi.AmdSmiException):
+                    return pyamdsmi.amdsmi_get_processor_handle_from_bdf(bdf)
             nonlocal devs_mapping
             if devs_mapping is None:
                 devs = pyamdsmi.amdsmi_get_processor_handles()
diff --git a/gpustack_runtime/envs.py b/gpustack_runtime/envs.py
@@ -182,6 +182,15 @@
     When detected devices are considered to be partially mapped (starting from a non-zero value or not contiguous),
     alignment is performed to ensure they are correctly identified.
     """
+    GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY: bool = False
+    """
+    Enable CPU affinity for deployed workloads.
+    """
+    GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY: bool = False
+    """
+    Enable NUMA affinity for deployed workloads.
+    When enabled, `GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY` is also implied.
+    """
 
     # Deployer
 
@@ -392,6 +401,12 @@
         ),
         sep=",",
     ),
+    "GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY": lambda: to_bool(
+        getenv("GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY", "0"),
+    ),
+    "GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY": lambda: to_bool(
+        getenv("GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY", "0"),
+    ),
     # Deployer
     ## Docker
     "GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS": lambda: to_dict(
diff --git a/tests/gpustack_runtime/detector/samples/detect_output_amd_rx7800xt.json b/tests/gpustack_runtime/detector/samples/detect_output_amd_rx7800xt.json
@@ -9,18 +9,19 @@
     "runtime_version_original": "7.1.1",
     "compute_capability": "gfx1101",
     "cores": 60,
-    "cores_utilization": 0,
+    "cores_utilization": 19,
     "memory": 16368,
-    "memory_used": 5713,
-    "memory_utilization": 34.9,
-    "temperature": 37,
+    "memory_used": 206,
+    "memory_utilization": 1.26,
+    "temperature": 34,
     "power": 236,
-    "power_used": 7,
+    "power_used": 17,
     "appendix": {
       "arch_family": "GC 11.0.0",
       "vgpu": false,
+      "bdf": "0000:03:00.0",
       "card_id": 1,
-      "bdf": "0000:03:00.0"
+      "renderd_id": 128
     }
   }
 ]