Skip to content

Commit 2287146

Browse files
committed
8377771: Race on allocating overreserved large pages on a NUMA system
1 parent bfb6de5 commit 2287146

File tree

5 files changed

+59
-52
lines changed

5 files changed

+59
-52
lines changed

src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp

Lines changed: 10 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
385385
return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
386386
}
387387

388-
ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(zbacking_offset offset, size_t length, bool touch) const {
388+
ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(zbacking_offset offset, size_t length) const {
389389
// On hugetlbfs, mapping a file segment will fail immediately, without
390390
// the need to touch the mapped pages first, if there aren't enough huge
391391
// pages available to back the mapping.
@@ -398,11 +398,9 @@ ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(zbacking_offset o
398398
// Once mapped, the huge pages are only reserved. We need to touch them
399399
// to associate them with the file segment. Note that we can not punch
400400
// hole in file segments which only have reserved pages.
401-
if (touch) {
402-
char* const start = (char*)addr;
403-
char* const end = start + length;
404-
os::pretouch_memory(start, end, _block_size);
405-
}
401+
402+
// Touch the mapping (safely) to make sure it's backed by memory
403+
const bool backed = os::Linux::safe_fault_memory(addr, length, ZGranuleSize);
406404

407405
// Unmap again. From now on, the huge pages that were mapped are allocated
408406
// to this file. There's no risk of getting a SIGBUS when mapping and
@@ -412,28 +410,8 @@ ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(zbacking_offset o
412410
return errno;
413411
}
414412

415-
// Success
416-
return 0;
417-
}
418-
419-
static bool safe_touch_mapping(void* addr, size_t length, size_t page_size) {
420-
char* const start = (char*)addr;
421-
char* const end = start + length;
422-
423-
// Touching a mapping that can't be backed by memory will generate a
424-
// SIGBUS. By using SafeFetch32 any SIGBUS will be safely caught and
425-
// handled. On tmpfs, doing a fetch (rather than a store) is enough
426-
// to cause backing pages to be allocated (there's no zero-page to
427-
// worry about).
428-
for (char *p = start; p < end; p += page_size) {
429-
if (SafeFetch32((int*)p, -1) == -1) {
430-
// Failed
431-
return false;
432-
}
433-
}
434-
435-
// Success
436-
return true;
413+
// Success?
414+
return backed ? 0 : errno;
437415
}
438416

439417
ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(zbacking_offset offset, size_t length) const {
@@ -451,7 +429,7 @@ ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(zbacking_offset offse
451429
}
452430

453431
// Touch the mapping (safely) to make sure it's backed by memory
454-
const bool backed = safe_touch_mapping(addr, length, _block_size);
432+
const bool backed = os::Linux::safe_fault_memory(addr, length, _block_size);
455433

456434
// Unmap again. If successfully touched, the backing memory will
457435
// be allocated to this file. There's no risk of getting a SIGBUS
@@ -461,7 +439,7 @@ ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(zbacking_offset offse
461439
return errno;
462440
}
463441

464-
// Success
442+
// Success?
465443
return backed ? 0 : ENOMEM;
466444
}
467445

@@ -486,7 +464,7 @@ ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(zbacking_offset offset
486464
// mmap/munmap (for hugetlbfs and tmpfs with transparent huge pages) or pwrite
487465
// (for tmpfs without transparent huge pages and other filesystem types).
488466
if (ZLargePages::is_explicit()) {
489-
return fallocate_compat_mmap_hugetlbfs(offset, length, false /* touch */);
467+
return fallocate_compat_mmap_hugetlbfs(offset, length);
490468
} else if (ZLargePages::is_transparent()) {
491469
return fallocate_compat_mmap_tmpfs(offset, length);
492470
} else {
@@ -534,18 +512,6 @@ ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(zbacking_offset offset, size_
534512
}
535513

536514
ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(zbacking_offset offset, size_t length) const {
537-
if (ZLargePages::is_explicit()) {
538-
// We can only punch hole in pages that have been touched. Non-touched
539-
// pages are only reserved, and not associated with any specific file
540-
// segment. We don't know which pages have been previously touched, so
541-
// we always touch them here to guarantee that we can punch hole.
542-
const ZErrno err = fallocate_compat_mmap_hugetlbfs(offset, length, true /* touch */);
543-
if (err) {
544-
// Failed
545-
return err;
546-
}
547-
}
548-
549515
const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
550516
if (ZSyscall::fallocate(_fd, mode, untype(offset), length) == -1) {
551517
// Failed
@@ -665,9 +631,7 @@ size_t ZPhysicalMemoryBacking::commit_default(zbacking_offset offset, size_t len
665631
}
666632

667633
size_t ZPhysicalMemoryBacking::commit(zbacking_offset offset, size_t length, uint32_t numa_id) const {
668-
if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) {
669-
// The memory is required to be preferred at the time it is paged in. As a
670-
// consequence we must prefer the memory when committing non-large pages.
634+
if (ZNUMA::is_enabled()) {
671635
return commit_numa_preferred(offset, length, numa_id);
672636
}
673637

src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class ZPhysicalMemoryBacking {
4848
bool is_hugetlbfs() const;
4949
bool tmpfs_supports_transparent_huge_pages() const;
5050

51-
ZErrno fallocate_compat_mmap_hugetlbfs(zbacking_offset offset, size_t length, bool touch) const;
51+
ZErrno fallocate_compat_mmap_hugetlbfs(zbacking_offset offset, size_t length) const;
5252
ZErrno fallocate_compat_mmap_tmpfs(zbacking_offset offset, size_t length) const;
5353
ZErrno fallocate_compat_pwrite(zbacking_offset offset, size_t length) const;
5454
ZErrno fallocate_fill_hole_compat(zbacking_offset offset, size_t length) const;

src/hotspot/os/linux/os_linux.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
#include "runtime/osInfo.hpp"
5757
#include "runtime/osThread.hpp"
5858
#include "runtime/perfMemory.hpp"
59+
#include "runtime/safefetch.hpp"
5960
#include "runtime/sharedRuntime.hpp"
6061
#include "runtime/stubRoutines.hpp"
6162
#include "runtime/threads.hpp"
@@ -3005,6 +3006,36 @@ void os::Linux::madvise_transparent_huge_pages(void* addr, size_t bytes) {
30053006
::madvise(addr, bytes, MADV_HUGEPAGE);
30063007
}
30073008

3009+
bool os::Linux::safe_fault_memory(void* addr, size_t bytes, size_t page_size) {
3010+
const int result = ::madvise(addr, bytes, MADV_POPULATE_WRITE);
3011+
if (result == 0) {
3012+
// Success
3013+
return true;
3014+
} else if (errno != EINVAL) {
3015+
// Failed call to madvise for some other reason than EINVAL
3016+
return false;
3017+
}
3018+
3019+
// If we failed because of EINVAL it might be because MADV_POPULATE_WRITE is
3020+
// not supported. We then try faulting in the memory using SafeFetch.
3021+
3022+
char* const start = (char*)addr;
3023+
char* const end = start + bytes;
3024+
3025+
// Touching a mapping that can't be backed by memory will generate a
3026+
// SIGBUS. By using SafeFetch32 any SIGBUS will be safely caught and
3027+
// handled. A fetch is enough to cause backing pages to be allocated.
3028+
for (char *p = start; p < end; p += page_size) {
3029+
if (SafeFetch32((int*)p, -1) == -1) {
3030+
// Failed
3031+
return false;
3032+
}
3033+
}
3034+
3035+
// Success
3036+
return true;
3037+
}
3038+
30083039
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
30093040
if (Linux::should_madvise_anonymous_thps() && alignment_hint > vm_page_size()) {
30103041
Linux::madvise_transparent_huge_pages(addr, bytes);
@@ -4197,6 +4228,21 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_
41974228
if (addr != nullptr) {
41984229
if (UseNUMAInterleaving) {
41994230
numa_make_global(addr, bytes);
4231+
} else {
4232+
// Large pages are committed during reservation so that they are reserved for us.
4233+
// However, under special circumstances we might overreserve pages, so we must
4234+
// also make sure that we can back those pages immediately, not only reserve them.
4235+
// We do this by faulting in the large pages and associating them with the
4236+
// virtual memory reservation here.
4237+
if (!os::Linux::safe_fault_memory(addr, bytes, page_size)) {
4238+
if (::munmap(addr, bytes) != 0) {
4239+
ErrnoPreserver ep;
4240+
log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)",
4241+
RANGEFMTARGS(addr, bytes),
4242+
os::strerror(ep.saved_errno()));
4243+
}
4244+
return nullptr;
4245+
}
42004246
}
42014247
}
42024248

src/hotspot/os/linux/os_linux.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ class os::Linux {
195195

196196
static void madvise_transparent_huge_pages(void* addr, size_t bytes);
197197

198+
static bool safe_fault_memory(void* addr, size_t bytes, size_t page_size);
199+
198200
// Stack repair handling
199201

200202
// none present

src/hotspot/share/gc/z/zPhysicalMemoryManager.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,6 @@ void ZPhysicalMemoryManager::map(const ZVirtualMemory& vmem, uint32_t numa_id) c
290290
});
291291

292292
postcond(mapped == size);
293-
294-
// Setup NUMA preferred for large pages
295-
if (ZNUMA::is_enabled() && ZLargePages::is_explicit()) {
296-
os::numa_make_local((char*)addr, size, ZNUMA::numa_id_to_node(numa_id));
297-
}
298293
}
299294

300295
// Unmap virtual memory from physical memory

0 commit comments

Comments
 (0)