|
1 | 1 | #include "llama-mmap.h" |
2 | 2 |
|
3 | | -#include "llama-impl.h" |
4 | | - |
5 | | -#include "ggml.h" |
6 | | - |
7 | | -#include <cstring> |
| 3 | +#include <cerrno> |
8 | 4 | #include <climits> |
| 5 | +#include <cstring> |
9 | 6 | #include <stdexcept> |
10 | | -#include <cerrno> |
| 7 | +#include <thread> |
| 8 | + |
| 9 | +#include "ggml.h" |
| 10 | +#include "llama-impl.h" |
11 | 11 |
|
12 | 12 | #ifdef __has_include |
13 | 13 | #if __has_include(<unistd.h>) |
@@ -274,22 +274,37 @@ struct llama_mmap::impl { |
274 | 274 | int flags = MAP_SHARED; |
275 | 275 | if (numa) { prefetch = 0; } |
276 | 276 | #ifdef __linux__ |
277 | | - if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) { |
278 | | - LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n", |
279 | | - strerror(errno)); |
| 277 | + if (file->size() > 1024 * 1024 * 1024) { // 1GB+ |
| 278 | + if (posix_fadvise(fd, 0, 0, POSIX_FADV_WILLNEED | POSIX_FADV_SEQUENTIAL)) { |
| 279 | + LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_WILLNEED, POSIX_FADV_SEQUENTIAL) failed: %s\n", |
| 280 | + strerror(errno)); |
| 281 | + } |
| 282 | + } else { |
| 283 | + if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) { |
| 284 | + LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n", |
| 285 | + strerror(errno)); |
| 286 | + } |
| 287 | + } |
| 288 | + if (prefetch && file->size() <= 1024 * 1024 * 512) { // 512 MB threshold |
| 289 | + flags |= MAP_POPULATE; |
280 | 290 | } |
281 | | - if (prefetch) { flags |= MAP_POPULATE; } |
282 | 291 | #endif |
283 | 292 | addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0); |
284 | 293 | if (addr == MAP_FAILED) { |
285 | 294 | throw std::runtime_error(format("mmap failed: %s", strerror(errno))); |
286 | 295 | } |
287 | 296 |
|
288 | 297 | if (prefetch > 0) { |
289 | | - if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) { |
290 | | - LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n", |
291 | | - strerror(errno)); |
292 | | - } |
| 298 | +#ifdef __linux__ |
| 299 | + std::thread([addr=addr, size = file->size(), prefetch] { |
| 300 | + posix_madvise(addr, std::min(size, prefetch), POSIX_MADV_WILLNEED); |
| 301 | + }).detach(); |
| 302 | +#elif defined(_WIN32) |
| 303 | + std::thread([](void* addr, SIZE_T size) { |
| 304 | + WIN32_MEMORY_RANGE_ENTRY range = { addr, size }; |
| 305 | + PrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0); |
| 306 | + }, addr, std::min(file->size(), prefetch)).detach(); |
| 307 | +#endif |
293 | 308 | } |
294 | 309 | if (numa) { |
295 | 310 | if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) { |
|
0 commit comments