Skip to content

Commit f0dee3c

Browse files
author
lexasub
committed
some optimizations (may be)
1 parent bfcce4d commit f0dee3c

File tree

1 file changed

+29
-14
lines changed

1 file changed

+29
-14
lines changed

src/llama-mmap.cpp

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#include "llama-mmap.h"
22

3-
#include "llama-impl.h"
4-
5-
#include "ggml.h"
6-
7-
#include <cstring>
3+
#include <cerrno>
84
#include <climits>
5+
#include <cstring>
96
#include <stdexcept>
10-
#include <cerrno>
7+
#include <thread>
8+
9+
#include "ggml.h"
10+
#include "llama-impl.h"
1111

1212
#ifdef __has_include
1313
#if __has_include(<unistd.h>)
@@ -274,22 +274,37 @@ struct llama_mmap::impl {
274274
int flags = MAP_SHARED;
275275
if (numa) { prefetch = 0; }
276276
#ifdef __linux__
277-
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
278-
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
279-
strerror(errno));
277+
if (file->size() > 1024 * 1024 * 1024) { // 1GB+
278+
if (posix_fadvise(fd, 0, 0, POSIX_FADV_WILLNEED | POSIX_FADV_SEQUENTIAL)) {
279+
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_WILLNEED, POSIX_FADV_SEQUENTIAL) failed: %s\n",
280+
strerror(errno));
281+
}
282+
} else {
283+
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
284+
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
285+
strerror(errno));
286+
}
287+
}
288+
if (prefetch && file->size() <= 1024 * 1024 * 512) { // 512 MB threshold
289+
flags |= MAP_POPULATE;
280290
}
281-
if (prefetch) { flags |= MAP_POPULATE; }
282291
#endif
283292
addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
284293
if (addr == MAP_FAILED) {
285294
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
286295
}
287296

288297
if (prefetch > 0) {
289-
if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) {
290-
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
291-
strerror(errno));
292-
}
298+
#ifdef __linux__
299+
std::thread([addr=addr, size = file->size(), prefetch] {
300+
posix_madvise(addr, std::min(size, prefetch), POSIX_MADV_WILLNEED);
301+
}).detach();
302+
#elif defined(_WIN32)
303+
std::thread([](void* addr, SIZE_T size) {
304+
WIN32_MEMORY_RANGE_ENTRY range = { addr, size };
305+
PrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0);
306+
}, addr, std::min(file->size(), prefetch)).detach();
307+
#endif
293308
}
294309
if (numa) {
295310
if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) {

0 commit comments

Comments
 (0)