Skip to content

Commit 1e78084

Browse files
committed
llama-map to support hugepage feature of pagesize 2M or 1G which can greatly speedup loading huge model when system has enough RAM to pre-allocate hugetlbfs model file
1 parent 4375415 commit 1e78084

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

src/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,13 @@ if (BUILD_SHARED_LIBS)
4242
target_compile_definitions(llama PRIVATE LLAMA_BUILD)
4343
target_compile_definitions(llama PUBLIC LLAMA_SHARED)
4444
endif()
45+
46+
47+
if (GGML_USING_HUGE_PAGE_2M)
48+
message(STATUS " GGML_USING_HUGE_PAGE_2M is set in cmake")
49+
target_compile_definitions(llama PRIVATE GGML_USING_HUGE_PAGE_2M=1)
50+
endif()
51+
if (GGML_USING_HUGE_PAGE_1G)
52+
message(STATUS " GGML_USING_HUGE_PAGE_1G is set in cmake")
53+
target_compile_definitions(llama PRIVATE GGML_USING_HUGE_PAGE_1G=1)
54+
endif()

src/llama-mmap.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ static std::string llama_format_win_err(DWORD err) {
5454
}
5555
#endif
5656

57+
// llama_mmap supports for hugepagesz=2M and 1G
58+
#ifdef GGML_USING_HUGE_PAGE_2M
59+
#define HUGE_PAGE_SIZE 2097152
60+
#endif
61+
#ifdef GGML_USING_HUGE_PAGE_1G
62+
#define HUGE_PAGE_SIZE 1073741824
63+
#endif
64+
65+
5766
// llama_file
5867

5968
struct llama_file::impl {
@@ -274,9 +283,16 @@ struct llama_mmap::impl {
274283
std::vector<std::pair<size_t, size_t>> mapped_fragments;
275284

276285
impl(struct llama_file * file, size_t prefetch, bool numa) {
277-
size = file->size();
278286
int fd = file->file_id();
279287
int flags = MAP_SHARED;
288+
#if defined(GGML_USING_HUGE_PAGE_2M) || defined(GGML_USING_HUGE_PAGE_1G)
289+
// hugepage support requires mmap size to be aligned with pagesize
290+
// and this is even true for normal 4K page in mmap, only some OS relaxes
291+
size = (file->size() + HUGE_PAGE_SIZE - 1) / HUGE_PAGE_SIZE * HUGE_PAGE_SIZE;
292+
flags |= MAP_HUGETLB;
293+
#else
294+
size = file->size();
295+
#endif
280296
if (numa) { prefetch = 0; }
281297
#ifdef __linux__
282298
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
@@ -285,25 +301,25 @@ struct llama_mmap::impl {
285301
}
286302
if (prefetch) { flags |= MAP_POPULATE; }
287303
#endif
288-
addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
304+
addr = mmap(NULL, size, PROT_READ, flags, fd, 0);
289305
if (addr == MAP_FAILED) {
290306
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
291307
}
292308

293309
if (prefetch > 0) {
294-
if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) {
310+
if (posix_madvise(addr, std::min(size, prefetch), POSIX_MADV_WILLNEED)) {
295311
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
296312
strerror(errno));
297313
}
298314
}
299315
if (numa) {
300-
if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) {
316+
if (posix_madvise(addr, size, POSIX_MADV_RANDOM)) {
301317
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
302318
strerror(errno));
303319
}
304320
}
305321

306-
mapped_fragments.emplace_back(0, file->size());
322+
mapped_fragments.emplace_back(0, size);
307323
}
308324

309325
static void align_range(size_t * first, size_t * last, size_t page_size) {
@@ -319,7 +335,11 @@ struct llama_mmap::impl {
319335
}
320336

321337
void unmap_fragment(size_t first, size_t last) {
338+
#if defined(GGML_USING_HUGE_PAGE_2M) || defined(GGML_USING_HUGE_PAGE_1G)
339+
int page_size = HUGE_PAGE_SIZE;
340+
#else
322341
int page_size = sysconf(_SC_PAGESIZE);
342+
#endif
323343
align_range(&first, &last, page_size);
324344
size_t len = last - first;
325345

0 commit comments

Comments
 (0)