Skip to content

Commit a7eeff1

Browse files
authored
Support large files in FileReader on Windows (#1490)
### Changelog * C++: Support reading files of size 2GiB+ using `FileReader` on Windows. ### Docs None ### Description `fseek` and `ftell` use `long` for offsets, which is a signed 32-bit integer even on 64-bit Windows. This limited `FileReader` to files of size 2^31 - 1. When getting the file's size, larger files would cause `ftell` to return -1 (indicating an error). The return value was not checked, and this was cast to `uint64_t`, wrapping the value to 2^64 - 1. This PR changes the implementation to instead use the Windows-specific `_ftelli64` and `_fseeki64` which take an `__int64` instead, and also to check the return values. This PR also means that 32-bit POSIX platforms will now cause the `FileReader` constructor to throw an exception when passed a file of size 2GiB+ rather than running into the same problem. I have added a `FileReader` unit test generating a large sparse file and checked that it fails without the change to `FileReader`. Fixes #1486. Fixes: FIRE-221
1 parent 6391735 commit a7eeff1

File tree

12 files changed

+94
-15
lines changed

12 files changed

+94
-15
lines changed

cpp/bench/conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
class McapBenchmarksConan(ConanFile):
55
settings = "os", "compiler", "build_type", "arch"
66
generators = "cmake"
7-
requires = "benchmark/1.7.0", "mcap/2.1.1"
7+
requires = "benchmark/1.7.0", "mcap/2.1.2"
88

99
def build(self):
1010
cmake = CMake(self)

cpp/build-docs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -e
44

55
conan config init
66

7-
conan editable add ./mcap mcap/2.1.1
7+
conan editable add ./mcap mcap/2.1.2
88
conan install docs --install-folder docs/build/Release \
99
-s compiler.cppstd=17 -s build_type=Release --build missing
1010

cpp/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -e
44

55
conan config init
66

7-
conan editable add ./mcap mcap/2.1.1
7+
conan editable add ./mcap mcap/2.1.2
88
conan install test --install-folder test/build/Debug \
99
-s compiler.cppstd=17 -s build_type=Debug --build missing
1010

cpp/docs/conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
class McapDocsConan(ConanFile):
55
settings = "os", "compiler", "build_type", "arch"
66
generators = "cmake"
7-
requires = "mcap/2.1.1"
7+
requires = "mcap/2.1.2"
88

99
def build(self):
1010
cmake = CMake(self)

cpp/examples/conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ class McapExamplesConan(ConanFile):
55
settings = "os", "compiler", "build_type", "arch"
66
generators = "cmake"
77
requires = [
8-
"mcap/2.1.1",
8+
"mcap/2.1.2",
99
"protobuf/3.21.1",
1010
"nlohmann_json/3.10.5",
1111
"catch2/2.13.8",

cpp/mcap/conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
class McapConan(ConanFile):
55
name = "mcap"
6-
version = "2.1.1"
6+
version = "2.1.2"
77
url = "https://github.com/foxglove/mcap"
88
homepage = "https://github.com/foxglove/mcap"
99
description = "A C++ implementation of the MCAP file format"

cpp/mcap/include/mcap/reader.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,18 @@ class MCAP_PUBLIC FileReader final : public IReadable {
8080
uint64_t read(std::byte** output, uint64_t offset, uint64_t size) override;
8181

8282
private:
83+
// Numeric type returned by the tell/seek operations. Necessary because long on Windows is 32
84+
// bits so the standard C library interfaces don't work for files larger than 2GiB.
85+
#if defined _WIN32 || defined __CYGWIN__
86+
typedef __int64 offset_type;
87+
#else
88+
typedef long offset_type;
89+
#endif
90+
91+
static_assert((offset_type)(uint64_t)std::numeric_limits<offset_type>::max() ==
92+
std::numeric_limits<offset_type>::max(),
93+
"offset_type should fit in uint64_t");
94+
8395
std::FILE* file_;
8496
std::vector<std::byte> buffer_;
8597
uint64_t size_;

cpp/mcap/include/mcap/reader.inl

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,21 @@ FileReader::FileReader(std::FILE* file)
5151
assert(file_);
5252

5353
// Determine the size of the file
54-
std::fseek(file_, 0, SEEK_END);
55-
size_ = std::ftell(file_);
56-
std::fseek(file_, 0, SEEK_SET);
54+
if (std::fseek(file_, 0, SEEK_END) != 0) {
55+
throw std::system_error(errno, std::generic_category());
56+
}
57+
#if defined _WIN32 || defined __CYGWIN__
58+
offset_type s = _ftelli64(file_);
59+
#else
60+
offset_type s = std::ftell(file_);
61+
#endif
62+
if (s < 0) {
63+
throw std::system_error(errno, std::generic_category());
64+
}
65+
size_ = s;
66+
if (std::fseek(file_, 0, SEEK_SET) != 0) {
67+
throw std::system_error(errno, std::generic_category());
68+
}
5769
}
5870

5971
uint64_t FileReader::size() const {
@@ -64,20 +76,36 @@ uint64_t FileReader::read(std::byte** output, uint64_t offset, uint64_t size) {
6476
if (offset >= size_) {
6577
return 0;
6678
}
79+
// Clamp read size to SIZE_MAX. This should only be relevant on 32-bit platforms. If clamping
80+
// does happen, allocation will fail, but this ensures the error is surfaced sooner. Otherwise,
81+
// wraparound will produce the wrong read size, e.g. a read of 2^32 becomes a read of 0.
82+
// We should really be taking a size_t instead and shifting the responsibility to the caller.
83+
const std::size_t read_size = size <= (uint64_t)SIZE_MAX ? size : SIZE_MAX;
6784

6885
if (offset != position_) {
69-
std::fseek(file_, (long)(offset), SEEK_SET);
7086
std::fflush(file_);
87+
// offset <= size_ and size_ fits into offset_type by definition so these conversions are
88+
// lossless.
89+
#if defined _WIN32 || defined __CYGWIN__
90+
int status = _fseeki64(file_, (offset_type)offset, SEEK_SET);
91+
#else
92+
int status = std::fseek(file_, (offset_type)offset, SEEK_SET);
93+
#endif
94+
if (status < 0) {
95+
throw std::system_error(errno, std::generic_category());
96+
}
7197
position_ = offset;
7298
}
7399

74-
if (size > buffer_.size()) {
75-
buffer_.resize(size);
100+
if (read_size > buffer_.size()) {
101+
buffer_.resize(read_size);
76102
}
77103

78-
const uint64_t bytesRead = uint64_t(std::fread(buffer_.data(), 1, size, file_));
104+
const uint64_t bytesRead = uint64_t(std::fread(buffer_.data(), 1, read_size, file_));
79105
*output = buffer_.data();
80106

107+
// This should not overflow unless the file size exceeds 2^64 - 1 bytes, which should be
108+
// impossible even if the file grows while we're reading it.
81109
position_ += bytesRead;
82110
return bytesRead;
83111
}

cpp/mcap/include/mcap/types.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
namespace mcap {
1616

17-
#define MCAP_LIBRARY_VERSION "2.1.1"
17+
#define MCAP_LIBRARY_VERSION "2.1.2"
1818

1919
using SchemaId = uint16_t;
2020
using ChannelId = uint16_t;

cpp/test/conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
class McapTestConan(ConanFile):
55
settings = "os", "compiler", "build_type", "arch"
66
generators = "cmake"
7-
requires = "catch2/2.13.8", "mcap/2.1.1", "nlohmann_json/3.10.5"
7+
requires = "catch2/2.13.8", "mcap/2.1.2", "nlohmann_json/3.10.5"
88

99
def build(self):
1010
cmake = CMake(self)

0 commit comments

Comments
 (0)