Skip to content

Commit 9cae27f

Browse files
author
niushengxiao
committed
feat: default disable hugepage
1 parent 4b266b0 commit 9cae27f

File tree

3 files changed

+21
-7
lines changed

3 files changed

+21
-7
lines changed

lightllm/server/multi_level_kv_cache/disk_cache_worker.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ def __init__(
7676
)
7777

7878
def _prepare_tensor(self, tensor: torch.Tensor) -> torch.Tensor:
79-
num_page, num_layer = tensor.shape[0], tensor.shape[1]
80-
return tensor.reshape(num_page, num_layer, -1)
79+
return tensor.flatten(1).view(dtype=torch.uint8)
8180

8281
def run(self) -> None:
8382
while True:

lightllm/utils/envs_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,15 @@ def get_radix_tree_merge_update_delta() -> int:
199199
@lru_cache(maxsize=None)
200200
def get_disk_cache_prompt_limit_length():
201201
return int(os.getenv("LIGHTLLM_DISK_CACHE_PROMPT_LIMIT_LENGTH", 10000))
202+
203+
204+
@lru_cache(maxsize=None)
205+
def enable_huge_page():
206+
"""
207+
大页模式:启动后可大幅缩短cpu kv cache加载时间
208+
"sudo sed -i 's/^GRUB_CMDLINE_LINUX=\"/& default_hugepagesz=1G \
209+
hugepagesz=1G hugepages={需要启用的大页容量}/' /etc/default/grub"
210+
"sudo update-grub"
211+
"sudo reboot"
212+
"""
213+
return enable_env_vars("LIGHTLLM_HUGE_PAGE_ENABLE")

lightllm/utils/kv_cache_utils.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import triton
1010
from functools import lru_cache
11-
from lightllm.utils.envs_utils import get_env_start_args
11+
from lightllm.utils.envs_utils import get_env_start_args, enable_huge_page
1212
from lightllm.utils.log_utils import init_logger
1313
from lightllm.utils.config_utils import get_num_key_value_heads, get_head_dim, get_layer_num, get_model_type
1414
from typing import List, Tuple, Optional
@@ -93,7 +93,7 @@ def create_shm_kv_cache_ptr() -> int:
9393
args = get_env_start_args()
9494
key = args.cpu_kv_cache_shm_id
9595
requested_size = calcu_cpu_cache_meta().calcu_size()
96-
use_hugetlb = True
96+
use_hugetlb = enable_huge_page()
9797

9898
# 计算大页大小(默认从 /proc/meminfo 读取 Hugepagesize)
9999
def _get_default_hugepage_size() -> int:
@@ -109,16 +109,19 @@ def _get_default_hugepage_size() -> int:
109109
pass
110110
return 2 * 1024 * 1024 # fallback 2MB
111111

112-
# 向上对齐到大页大小
113-
huge_sz = _get_default_hugepage_size()
114-
size_to_alloc = triton.cdiv(requested_size, huge_sz) * huge_sz
115112
shmflg = 0o666 | 0o1000 # 权限和 IPC_CREAT 标志
116113
if use_hugetlb:
114+
# 向上对齐到大页大小
115+
huge_sz = _get_default_hugepage_size()
116+
size_to_alloc = triton.cdiv(requested_size, huge_sz) * huge_sz
117117
SHM_HUGETLB = 0o4000
118118
shmflg |= SHM_HUGETLB
119119
logger.info(
120120
f"Using SHM_HUGETLB, hugepage_size={huge_sz} bytes, requested={requested_size}, alloc={size_to_alloc}"
121121
)
122+
else:
123+
size_to_alloc = requested_size
124+
logger.info(f"Using regular pages, requested={requested_size}, alloc={size_to_alloc}")
122125

123126
# 优先尝试 HugeTLB 分配,失败则回退到普通页
124127
shmid = libc.shmget(key, size_to_alloc, shmflg)

0 commit comments

Comments
 (0)