|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
3 | 3 | """A CPU worker class."""
|
4 | 4 | import os
|
| 5 | +from importlib import util |
5 | 6 | from typing import Dict, List, Optional, Set, Tuple, Type
|
6 | 7 |
|
7 | 8 | import torch
|
@@ -156,8 +157,10 @@ def __init__(
|
156 | 157 |
|
157 | 158 | # Setup OpenMP threads affinity.
|
158 | 159 | omp_cpuids = envs.VLLM_CPU_OMP_THREADS_BIND
|
159 |
| - if omp_cpuids == "all": |
160 |
| - self.local_omp_cpuid = "all" |
| 160 | + self.local_omp_cpuid = "all" |
| 161 | + if omp_cpuids == "auto": |
| 162 | + self.local_omp_cpuid = self.get_cpus_id_binding_based_on_numa_nodes( |
| 163 | + ) |
161 | 164 | else:
|
162 | 165 | self.local_omp_cpuid = omp_cpuids.split("|")[rank]
|
163 | 166 |
|
@@ -399,3 +402,49 @@ def get_cache_block_size_bytes(self) -> int:
|
399 | 402 | return CPUCacheEngine.get_cache_block_size(
|
400 | 403 | self.cache_config.block_size, self.cache_config.cache_dtype,
|
401 | 404 | self.model_config, self.parallel_config)
|
| 405 | + |
| 406 | + def get_cpus_id_binding_based_on_numa_nodes(self) -> str: |
| 407 | + """Return CPUs id binding based on NUMA nodes. |
| 408 | + """ |
| 409 | + rank_to_cpus = self.local_omp_cpuid |
| 410 | + # Setup OpenMP thread affinity based on NUMA nodes automatically |
| 411 | + world_size = self.vllm_config.parallel_config.world_size |
| 412 | + libnuma_found = util.find_spec("numa") is not None |
| 413 | + psutil_found = util.find_spec("psutil") is not None |
| 414 | + if libnuma_found and psutil_found: |
| 415 | + import psutil |
| 416 | + from numa import info |
| 417 | + cpu_count = psutil.cpu_count(logical=False) |
| 418 | + cpus_allow_list = psutil.Process().cpu_affinity() |
| 419 | + numa_size = info.get_num_configured_nodes() |
| 420 | + cpu_count_per_numa = cpu_count // numa_size |
| 421 | + num_of_reserved_cpu = min(envs.VLLM_CPU_NUM_OF_RESERVED_CPU, |
| 422 | + cpu_count_per_numa // 2) |
| 423 | + |
| 424 | + # check allow node_to_cpus list |
| 425 | + node_to_cpus = [] |
| 426 | + for i in range(numa_size): |
| 427 | + node_intersect = set( |
| 428 | + info.node_to_cpus(i)).intersection(cpus_allow_list) |
| 429 | + if bool(node_intersect): |
| 430 | + node_to_cpus.append(list(node_intersect)) |
| 431 | + |
| 432 | + if world_size > len(node_to_cpus): |
| 433 | + logger.error( |
| 434 | + "Auto thread-binding failed due to " |
| 435 | + "world size: %d is larger than " |
| 436 | + "allowed NUMA nodes number: %d." |
| 437 | + "Please try to bind threads manually.", world_size, |
| 438 | + len(node_to_cpus)) |
| 439 | + else: |
| 440 | + end = cpu_count_per_numa - num_of_reserved_cpu |
| 441 | + rank_to_cpus_list = node_to_cpus[self.rank][:end] |
| 442 | + rank_to_cpus = ','.join(str(x) for x in rank_to_cpus_list) |
| 443 | + logger.info("auto thread-binding list: %s", rank_to_cpus) |
| 444 | + else: |
| 445 | + logger.warning( |
| 446 | + "Auto thread-binding is not supported due to " |
| 447 | + "the lack of package numa and psutil," |
| 448 | + "fallback to no thread-binding. To get better performance," |
| 449 | + "please try to manually bind threads.") |
| 450 | + return rank_to_cpus |
0 commit comments