88import numpy as np
99import triton
1010from functools import lru_cache
11- from lightllm .utils .envs_utils import get_env_start_args
11+ from lightllm .utils .envs_utils import get_env_start_args , enable_huge_page
1212from lightllm .utils .log_utils import init_logger
1313from lightllm .utils .config_utils import get_num_key_value_heads , get_head_dim , get_layer_num , get_model_type
1414from typing import List , Tuple , Optional
@@ -93,7 +93,7 @@ def create_shm_kv_cache_ptr() -> int:
9393 args = get_env_start_args ()
9494 key = args .cpu_kv_cache_shm_id
9595 requested_size = calcu_cpu_cache_meta ().calcu_size ()
96- use_hugetlb = True
96+ use_hugetlb = enable_huge_page ()
9797
9898 # 计算大页大小(默认从 /proc/meminfo 读取 Hugepagesize)
9999 def _get_default_hugepage_size () -> int :
@@ -109,37 +109,35 @@ def _get_default_hugepage_size() -> int:
109109 pass
110110 return 2 * 1024 * 1024 # fallback 2MB
111111
112- # 向上对齐到大页大小
113- huge_sz = _get_default_hugepage_size ()
114- size_to_alloc = triton .cdiv (requested_size , huge_sz ) * huge_sz
115112 shmflg = 0o666 | 0o1000 # 权限和 IPC_CREAT 标志
116113 if use_hugetlb :
114+ # 向上对齐到大页大小
115+ huge_sz = _get_default_hugepage_size ()
116+ size_to_alloc = triton .cdiv (requested_size , huge_sz ) * huge_sz
117117 SHM_HUGETLB = 0o4000
118118 shmflg |= SHM_HUGETLB
119119 logger .info (
120120 f"Using SHM_HUGETLB, hugepage_size={ huge_sz } bytes, requested={ requested_size } , alloc={ size_to_alloc } "
121121 )
122+ else :
123+ size_to_alloc = requested_size
124+ logger .info (f"Using regular pages, requested={ requested_size } , alloc={ size_to_alloc } " )
122125
123- # 优先尝试 HugeTLB 分配,失败则回退到普通页
124126 shmid = libc .shmget (key , size_to_alloc , shmflg )
125127 hugepages_num = (size_to_alloc + 1024 * 1024 * 1024 - 1 ) // (1024 * 1024 * 1024 )
126- if shmid < 0 and use_hugetlb :
127- err = ctypes .get_errno ()
128- logger .error (
129- f"shmget with SHM_HUGETLB failed (errno={ err } ). Falling back to regular pages."
130- f"You may need to configure hugepages manually, e.g.,"
131- f"sudo sed -i 's/^GRUB_CMDLINE_LINUX=\" /& default_hugepagesz=1G \
132- hugepagesz=1G hugepages={ hugepages_num } /' /etc/default/grub"
133- f"sudo update-grub"
134- f"sudo reboot"
135- )
136- # 回退:去掉 HUGETLB 标志,使用请求原始大小
137- shmflg_n = 0o666 | 0o1000
138- shmid = libc .shmget (key , size_to_alloc , shmflg_n )
139-
140128 if shmid < 0 :
141129 err = ctypes .get_errno ()
142- raise Exception (f"Error creating shared memory (errno={ err } )" )
130+ if use_hugetlb :
131+ raise Exception (
132+ f"shmget with SHM_HUGETLB failed (errno={ err } ). Falling back to regular pages."
133+ f"You may need to configure hugepages manually, e.g.,"
134+ f"sudo sed -i 's/^GRUB_CMDLINE_LINUX=\" /& default_hugepagesz=1G \
135+ hugepagesz=1G hugepages={ hugepages_num } /' /etc/default/grub"
136+ f"sudo update-grub"
137+ f"sudo reboot"
138+ )
139+ else :
140+ raise Exception (f"Error creating regular shared memory (errno={ err } )" )
143141
144142 register_sysv_shm_for_cleanup (key , shmid )
145143 logger .info (f"Shared memory ID: { shmid } " )
0 commit comments