Skip to content

Commit e3b7eda

Browse files
Mmap windows (#15538)
Current windows implementation had issues with 64 bit files. This expands the coverage to work properly and add some tests
1 parent 96189bf commit e3b7eda

File tree

6 files changed

+221
-56
lines changed

6 files changed

+221
-56
lines changed

extension/data_loader/mman.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#pragma once
1313

1414
#include <executorch/runtime/platform/compiler.h>
15+
#include <sys/stat.h>
16+
#include <cstdint>
1517

1618
#ifndef _WIN32
1719

@@ -22,6 +24,25 @@ ET_INLINE size_t get_os_page_size() {
2224
return sysconf(_SC_PAGESIZE);
2325
}
2426

27+
/**
28+
* Platform-specific file stat function.
29+
*/
30+
ET_INLINE int get_file_stat(int fd, size_t* out_size) {
31+
struct stat st;
32+
int err = ::fstat(fd, &st);
33+
if (err >= 0) {
34+
*out_size = static_cast<size_t>(st.st_size);
35+
}
36+
return err;
37+
}
38+
39+
/**
40+
* Platform-specific mmap offset type conversion.
41+
*/
42+
ET_INLINE off_t get_mmap_offset(size_t offset) {
43+
return static_cast<off_t>(offset);
44+
}
45+
2546
#else
2647

2748
#define NOMINMAX
@@ -40,4 +61,23 @@ ET_INLINE long get_os_page_size() {
4061
return pagesize;
4162
}
4263

64+
/**
65+
* Platform-specific file stat function.
66+
*/
67+
ET_INLINE int get_file_stat(int fd, size_t* out_size) {
68+
struct _stat64 st;
69+
int err = ::_fstat64(fd, &st);
70+
if (err >= 0) {
71+
*out_size = static_cast<size_t>(st.st_size);
72+
}
73+
return err;
74+
}
75+
76+
/**
77+
* Platform-specific mmap offset type conversion.
78+
*/
79+
ET_INLINE uint64_t get_mmap_offset(size_t offset) {
80+
return static_cast<uint64_t>(offset);
81+
}
82+
4383
#endif

extension/data_loader/mman_windows.cpp

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@
2121

2222
#include <errno.h>
2323
#include <io.h>
24+
#include <cstdint>
25+
#include <limits>
26+
#define NOMINMAX
2427
#include <windows.h>
28+
#undef NOMINMAX
2529

2630
#ifndef STATUS_SECTION_TOO_BIG
2731
#define STATUS_SECTION_TOO_BIG 0xC0000040L
@@ -129,49 +133,44 @@ static DWORD __map_mmap_prot_file(const int prot) {
129133

130134
} // namespace
131135

132-
void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off) {
136+
void* mmap(
137+
void* addr,
138+
size_t len,
139+
int prot,
140+
int flags,
141+
int fildes,
142+
uint64_t off) {
133143
HANDLE fm, h;
134-
135144
void* map = MAP_FAILED;
136145

137-
#ifdef _MSC_VER
138-
#pragma warning(push)
139-
#pragma warning(disable : 4293)
140-
#endif
141-
142-
const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD))
143-
? (DWORD)off
144-
: (DWORD)(off & 0xFFFFFFFFL);
145-
const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD))
146-
? (DWORD)0
147-
: (DWORD)((off >> 32) & 0xFFFFFFFFL);
148-
const DWORD protect = __map_mmap_prot_page(prot);
149-
const DWORD desiredAccess = __map_mmap_prot_file(prot);
150-
151-
const off_t maxSize = off + (off_t)len;
152-
153-
const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD))
154-
? (DWORD)maxSize
155-
: (DWORD)(maxSize & 0xFFFFFFFFL);
156-
const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD))
157-
? (DWORD)0
158-
: (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
159-
160-
#ifdef _MSC_VER
161-
#pragma warning(pop)
162-
#endif
163-
164146
errno = 0;
165147

166148
if (len == 0
167149
/* Unsupported flag combinations */
168150
|| (flags & MAP_FIXED) != 0
169-
/* Usupported protection combinations */
151+
/* Unsupported protection combinations */
170152
|| prot == PROT_EXEC) {
171153
errno = EINVAL;
172154
return MAP_FAILED;
173155
}
174156

157+
if (off > std::numeric_limits<std::uint64_t>::max() - len) {
158+
errno = EINVAL;
159+
return MAP_FAILED;
160+
}
161+
162+
const std::uint64_t maxSize = off + static_cast<std::uint64_t>(len);
163+
164+
const DWORD dwFileOffsetLow = static_cast<DWORD>(off & 0xFFFFFFFFULL);
165+
const DWORD dwFileOffsetHigh =
166+
static_cast<DWORD>((off >> 32) & 0xFFFFFFFFULL);
167+
const DWORD protect = __map_mmap_prot_page(prot);
168+
const DWORD desiredAccess = __map_mmap_prot_file(prot);
169+
170+
const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFULL);
171+
const DWORD dwMaxSizeHigh =
172+
static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFULL);
173+
175174
h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes)
176175
: INVALID_HANDLE_VALUE;
177176

extension/data_loader/mman_windows.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#endif
3232

3333
#include <sys/types.h>
34+
#include <cstdint>
3435

3536
#ifdef __cplusplus
3637
extern "C" {
@@ -56,7 +57,13 @@ extern "C" {
5657
#define MS_SYNC 2
5758
#define MS_INVALIDATE 4
5859

59-
void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off);
60+
void* mmap(
61+
void* addr,
62+
size_t len,
63+
int prot,
64+
int flags,
65+
int fildes,
66+
uint64_t off);
6067
int munmap(void* addr, size_t len);
6168
int mprotect(void* addr, size_t len, int prot);
6269
int msync(void* addr, size_t len, int flags);

extension/data_loader/mmap_data_loader.cpp

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <executorch/extension/data_loader/mmap_data_loader.h>
1010

1111
#include <cerrno>
12+
#include <cstdint>
1213
#include <cstring>
1314
#include <limits>
1415

@@ -94,8 +95,8 @@ Result<MmapDataLoader> MmapDataLoader::from(
9495
}
9596

9697
// Cache the file size.
97-
struct stat st;
98-
int err = ::fstat(fd, &st);
98+
size_t file_size;
99+
int err = get_file_stat(fd, &file_size);
99100
if (err < 0) {
100101
ET_LOG(
101102
Error,
@@ -106,7 +107,6 @@ Result<MmapDataLoader> MmapDataLoader::from(
106107
::close(fd);
107108
return Error::AccessFailed;
108109
}
109-
size_t file_size = st.st_size;
110110

111111
// Copy the filename so we can print better debug messages if reads fail.
112112
const char* file_name_copy = ::strdup(file_name);
@@ -167,12 +167,6 @@ Error MmapDataLoader::validate_input(size_t offset, size_t size) const {
167167
offset,
168168
size,
169169
file_size_);
170-
ET_CHECK_OR_RETURN_ERROR(
171-
// Recommended by a lint warning.
172-
offset <= std::numeric_limits<off_t>::max(),
173-
InvalidArgument,
174-
"Offset %zu too large for off_t",
175-
offset);
176170
return Error::Ok;
177171
}
178172

@@ -207,13 +201,10 @@ Result<FreeableBuffer> MmapDataLoader::load(
207201

208202
// Map the pages read-only. Use shared mappings so that other processes
209203
// can also map the same pages and share the same memory.
210-
void* pages = ::mmap(
211-
nullptr,
212-
map_size,
213-
PROT_READ,
214-
MAP_SHARED,
215-
fd_,
216-
static_cast<off_t>(range.start));
204+
const auto map_offset = get_mmap_offset(range.start);
205+
206+
void* pages =
207+
::mmap(nullptr, map_size, PROT_READ, MAP_SHARED, fd_, map_offset);
217208
ET_CHECK_OR_RETURN_ERROR(
218209
pages != MAP_FAILED,
219210
AccessFailed,
@@ -315,13 +306,10 @@ Error MmapDataLoader::load_into(
315306
// Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since
316307
// the data is read-only, but use PRIVATE just to further avoid accidentally
317308
// modifying the file.
318-
void* pages = ::mmap(
319-
nullptr,
320-
map_size,
321-
PROT_READ,
322-
MAP_PRIVATE,
323-
fd_,
324-
static_cast<off_t>(range.start));
309+
const auto map_offset = get_mmap_offset(range.start);
310+
311+
void* pages =
312+
::mmap(nullptr, map_size, PROT_READ, MAP_PRIVATE, fd_, map_offset);
325313
ET_CHECK_OR_RETURN_ERROR(
326314
pages != MAP_FAILED,
327315
AccessFailed,

extension/data_loader/test/mmap_data_loader_test.cpp

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <executorch/extension/data_loader/mmap_data_loader.h>
1010

1111
#include <cstring>
12+
#include <vector>
1213

1314
#include <gtest/gtest.h>
1415

@@ -428,4 +429,61 @@ TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) {
428429

429430
// Verify memory copied correctly.
430431
EXPECT_EQ(0, std::memcmp(dst, contents + offset, size));
431-
}
432+
}
433+
434+
// Tests that the loader can handle files requiring 64-bit file systems.
435+
// This test verifies that offsets and sizes beyond 32-bit limits are handled
436+
// correctly by creating a sparse file with data at a large offset.
437+
TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) {
438+
// We run some 32 bit tests on Linux so we need to skip this
439+
// test.
440+
#ifndef _WIN32
441+
if (sizeof(off_t) <= 8) {
442+
return;
443+
}
444+
#endif
445+
// Create a sparse file with a marker at an offset beyond 2GB (32-bit limit).
446+
// We use 3GB to ensure we're testing 64-bit offset handling.
447+
const size_t large_offset = 3ULL * 1024 * 1024 * 1024; // 3GB
448+
const std::string test_marker = "TEST_MARKER_AT_LARGE_OFFSET";
449+
450+
// Use TempFile sparse file API to create a 3GB+ file
451+
TempFile tf(large_offset, test_marker, large_offset + test_marker.size());
452+
453+
// Now try to load the data using MmapDataLoader.
454+
Result<MmapDataLoader> mdl = MmapDataLoader::from(tf.path().c_str());
455+
ASSERT_EQ(mdl.error(), Error::Ok)
456+
<< "Failed to create MmapDataLoader for large sparse file";
457+
458+
// Verify the file size is reported correctly (should be > 3GB).
459+
Result<size_t> file_size = mdl->size();
460+
ASSERT_EQ(file_size.error(), Error::Ok);
461+
EXPECT_GT(*file_size, large_offset)
462+
<< "File size should be larger than the large offset";
463+
EXPECT_EQ(*file_size, large_offset + test_marker.size())
464+
<< "File size should match offset + marker size";
465+
466+
// Try to load the marker data from the large offset.
467+
Result<FreeableBuffer> fb = mdl->load(
468+
large_offset,
469+
test_marker.size(),
470+
DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program));
471+
ASSERT_EQ(fb.error(), Error::Ok) << "Failed to load data from large offset";
472+
473+
EXPECT_EQ(fb->size(), test_marker.size());
474+
EXPECT_EQ(0, std::memcmp(fb->data(), test_marker.data(), test_marker.size()))
475+
<< "Data at large offset does not match expected marker";
476+
477+
// Test load_into as well.
478+
std::vector<uint8_t> buffer(test_marker.size());
479+
Error err = mdl->load_into(
480+
large_offset,
481+
test_marker.size(),
482+
DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program),
483+
buffer.data());
484+
ASSERT_EQ(err, Error::Ok) << "load_into failed for large offset";
485+
486+
EXPECT_EQ(
487+
0, std::memcmp(buffer.data(), test_marker.data(), test_marker.size()))
488+
<< "load_into data at large offset does not match expected marker";
489+
}

0 commit comments

Comments
 (0)