@@ -274,22 +274,37 @@ struct llama_mmap::impl {
274274 int flags = MAP_SHARED;
275275 if (numa) { prefetch = 0 ; }
276276#ifdef __linux__
277- if (posix_fadvise (fd, 0 , 0 , POSIX_FADV_SEQUENTIAL)) {
278- LLAMA_LOG_WARN (" warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n " ,
279- strerror (errno));
277+ if (file->size () > 1024 * 1024 * 1024 ) { // 1GB+
278+ if (posix_fadvise (fd, 0 , 0 , POSIX_FADV_WILLNEED | POSIX_FADV_SEQUENTIAL)) {
279+ LLAMA_LOG_WARN (" warning: posix_fadvise(.., POSIX_FADV_WILLNEED, POSIX_FADV_SEQUENTIAL) failed: %s\n " ,
280+ strerror (errno));
281+ }
282+ } else {
283+ if (posix_fadvise (fd, 0 , 0 , POSIX_FADV_SEQUENTIAL)) {
284+ LLAMA_LOG_WARN (" warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n " ,
285+ strerror (errno));
286+ }
287+ }
288+ if (prefetch && file->size () <= 1024 * 1024 * 512 ) { // 512 MB threshold
289+ flags |= MAP_POPULATE;
280290 }
281- if (prefetch) { flags |= MAP_POPULATE; }
282291#endif
283292 addr = mmap (NULL , file->size (), PROT_READ, flags, fd, 0 );
284293 if (addr == MAP_FAILED) {
285294 throw std::runtime_error (format (" mmap failed: %s" , strerror (errno)));
286295 }
287296
288297 if (prefetch > 0 ) {
289- if (posix_madvise (addr, std::min (file->size (), prefetch), POSIX_MADV_WILLNEED)) {
290- LLAMA_LOG_WARN (" warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n " ,
291- strerror (errno));
292- }
298+ #ifdef __linux__
299+ std::thread ([addr=addr, size = file->size (), prefetch] {
300+ posix_madvise (addr, std::min (size, prefetch), POSIX_MADV_WILLNEED);
301+ }).detach ();
302+ #elif defined(_WIN32)
303+ std::thread ([](void * addr, SIZE_T size) {
304+ WIN32_MEMORY_RANGE_ENTRY range = { addr, size };
305+ PrefetchVirtualMemory (GetCurrentProcess (), 1 , &range, 0 );
306+ }, addr, std::min (file->size (), prefetch)).detach ();
307+ #endif
293308 }
294309 if (numa) {
295310 if (posix_madvise (addr, file->size (), POSIX_MADV_RANDOM)) {
0 commit comments