Skip to content

Commit f0e5e5e

Browse files
author
niushengxiao
committed
feat: default disable hugepage
1 parent 4b266b0 commit f0e5e5e

File tree

3 files changed

+32
-23
lines changed

3 files changed

+32
-23
lines changed

lightllm/server/multi_level_kv_cache/disk_cache_worker.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ def __init__(
7676
)
7777

7878
def _prepare_tensor(self, tensor: torch.Tensor) -> torch.Tensor:
79-
num_page, num_layer = tensor.shape[0], tensor.shape[1]
80-
return tensor.reshape(num_page, num_layer, -1)
79+
return tensor.flatten(1).view(dtype=torch.uint8)
8180

8281
def run(self) -> None:
8382
while True:

lightllm/utils/envs_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,15 @@ def get_radix_tree_merge_update_delta() -> int:
199199
@lru_cache(maxsize=None)
200200
def get_disk_cache_prompt_limit_length():
201201
return int(os.getenv("LIGHTLLM_DISK_CACHE_PROMPT_LIMIT_LENGTH", 10000))
202+
203+
204+
@lru_cache(maxsize=None)
205+
def enable_huge_page():
206+
"""
207+
大页模式:启动后可大幅缩短cpu kv cache加载时间
208+
"sudo sed -i 's/^GRUB_CMDLINE_LINUX=\"/& default_hugepagesz=1G \
209+
hugepagesz=1G hugepages={需要启用的大页容量}/' /etc/default/grub"
210+
"sudo update-grub"
211+
"sudo reboot"
212+
"""
213+
return enable_env_vars("LIGHTLLM_HUGE_PAGE_ENABLE")

lightllm/utils/kv_cache_utils.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import triton
1010
from functools import lru_cache
11-
from lightllm.utils.envs_utils import get_env_start_args
11+
from lightllm.utils.envs_utils import get_env_start_args, enable_huge_page
1212
from lightllm.utils.log_utils import init_logger
1313
from lightllm.utils.config_utils import get_num_key_value_heads, get_head_dim, get_layer_num, get_model_type
1414
from typing import List, Tuple, Optional
@@ -93,7 +93,7 @@ def create_shm_kv_cache_ptr() -> int:
9393
args = get_env_start_args()
9494
key = args.cpu_kv_cache_shm_id
9595
requested_size = calcu_cpu_cache_meta().calcu_size()
96-
use_hugetlb = True
96+
use_hugetlb = enable_huge_page()
9797

9898
# 计算大页大小(默认从 /proc/meminfo 读取 Hugepagesize)
9999
def _get_default_hugepage_size() -> int:
@@ -109,37 +109,35 @@ def _get_default_hugepage_size() -> int:
109109
pass
110110
return 2 * 1024 * 1024 # fallback 2MB
111111

112-
# 向上对齐到大页大小
113-
huge_sz = _get_default_hugepage_size()
114-
size_to_alloc = triton.cdiv(requested_size, huge_sz) * huge_sz
115112
shmflg = 0o666 | 0o1000 # 权限和 IPC_CREAT 标志
116113
if use_hugetlb:
114+
# 向上对齐到大页大小
115+
huge_sz = _get_default_hugepage_size()
116+
size_to_alloc = triton.cdiv(requested_size, huge_sz) * huge_sz
117117
SHM_HUGETLB = 0o4000
118118
shmflg |= SHM_HUGETLB
119119
logger.info(
120120
f"Using SHM_HUGETLB, hugepage_size={huge_sz} bytes, requested={requested_size}, alloc={size_to_alloc}"
121121
)
122+
else:
123+
size_to_alloc = requested_size
124+
logger.info(f"Using regular pages, requested={requested_size}, alloc={size_to_alloc}")
122125

123-
# 优先尝试 HugeTLB 分配,失败则回退到普通页
124126
shmid = libc.shmget(key, size_to_alloc, shmflg)
125127
hugepages_num = (size_to_alloc + 1024 * 1024 * 1024 - 1) // (1024 * 1024 * 1024)
126-
if shmid < 0 and use_hugetlb:
127-
err = ctypes.get_errno()
128-
logger.error(
129-
f"shmget with SHM_HUGETLB failed (errno={err}). Falling back to regular pages."
130-
f"You may need to configure hugepages manually, e.g.,"
131-
f"sudo sed -i 's/^GRUB_CMDLINE_LINUX=\"/& default_hugepagesz=1G \
132-
hugepagesz=1G hugepages={hugepages_num}/' /etc/default/grub"
133-
f"sudo update-grub"
134-
f"sudo reboot"
135-
)
136-
# 回退:去掉 HUGETLB 标志,使用请求原始大小
137-
shmflg_n = 0o666 | 0o1000
138-
shmid = libc.shmget(key, size_to_alloc, shmflg_n)
139-
140128
if shmid < 0:
141129
err = ctypes.get_errno()
142-
raise Exception(f"Error creating shared memory (errno={err})")
130+
if use_hugetlb:
131+
raise Exception(
132+
f"shmget with SHM_HUGETLB failed (errno={err}). Falling back to regular pages."
133+
f"You may need to configure hugepages manually, e.g.,"
134+
f"sudo sed -i 's/^GRUB_CMDLINE_LINUX=\"/& default_hugepagesz=1G \
135+
hugepagesz=1G hugepages={hugepages_num}/' /etc/default/grub"
136+
f"sudo update-grub"
137+
f"sudo reboot"
138+
)
139+
else:
140+
raise Exception(f"Error creating regular shared memory (errno={err})")
143141

144142
register_sysv_shm_for_cleanup(key, shmid)
145143
logger.info(f"Shared memory ID: {shmid}")

0 commit comments

Comments
 (0)