Skip to content

Commit 5d64c23

Browse files
laramielcopybara-github
authored andcommitted
Convert MappedRegion to MemoryRegion and make it more generic.
Improve ReadAllToString to better handle /proc and /sys files and other file types which do not report correct sizes by using the standard technique of calling ::read until the return value is 0. PiperOrigin-RevId: 715978234 Change-Id: I09c9681d5cb1b6140398c8b6d3748f051b321512
1 parent 15f5dfd commit 5d64c23

File tree

12 files changed

+289
-115
lines changed

12 files changed

+289
-115
lines changed

tensorstore/internal/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,9 @@ tensorstore_cc_library(
417417
name = "flat_cord_builder",
418418
hdrs = ["flat_cord_builder.h"],
419419
deps = [
420+
"//tensorstore/internal/os:memory_region",
420421
"@com_google_absl//absl/base:core_headers",
421422
"@com_google_absl//absl/log:absl_check",
422-
"@com_google_absl//absl/strings",
423423
"@com_google_absl//absl/strings:cord",
424424
],
425425
)

tensorstore/internal/flat_cord_builder.h

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#ifndef TENSORSTORE_INTERNAL_FLAT_CORD_BUILDER_H_
1616
#define TENSORSTORE_INTERNAL_FLAT_CORD_BUILDER_H_
1717

18+
#include <stddef.h>
19+
1820
#include <cstdlib>
1921
#include <cstring>
2022
#include <string_view>
@@ -23,7 +25,7 @@
2325
#include "absl/base/optimization.h"
2426
#include "absl/log/absl_check.h"
2527
#include "absl/strings/cord.h"
26-
#include "absl/strings/string_view.h"
28+
#include "tensorstore/internal/os/memory_region.h"
2729

2830
namespace tensorstore {
2931
namespace internal {
@@ -39,67 +41,53 @@ class FlatCordBuilder {
3941
FlatCordBuilder() = default;
4042
explicit FlatCordBuilder(size_t size) : FlatCordBuilder(size, size) {}
4143

44+
explicit FlatCordBuilder(internal_os::MemoryRegion region)
45+
: region_(std::move(region)), inuse_(region_.size()) {}
46+
4247
FlatCordBuilder(size_t size, size_t inuse)
43-
: data_(static_cast<char*>(::malloc(size))),
44-
size_(size),
45-
inuse_(inuse <= size ? inuse : size) {
46-
ABSL_CHECK(size == 0 || data_);
48+
: FlatCordBuilder(internal_os::AllocateHeapRegion(size), inuse) {}
49+
50+
FlatCordBuilder(internal_os::MemoryRegion region, size_t inuse)
51+
: region_(std::move(region)), inuse_(inuse) {
52+
ABSL_CHECK(inuse <= region_.size());
4753
}
4854

4955
FlatCordBuilder(const FlatCordBuilder&) = delete;
5056
FlatCordBuilder& operator=(const FlatCordBuilder&) = delete;
5157
FlatCordBuilder(FlatCordBuilder&& other)
52-
: data_(std::exchange(other.data_, nullptr)),
53-
size_(std::exchange(other.size_, 0)),
58+
: region_(std::move(other.region_)),
5459
inuse_(std::exchange(other.inuse_, 0)) {}
60+
5561
FlatCordBuilder& operator=(FlatCordBuilder&& other) {
56-
std::swap(data_, other.data_);
57-
std::swap(size_, other.size_);
58-
std::swap(inuse_, other.inuse_);
62+
region_ = std::move(other.region_);
63+
inuse_ = std::exchange(other.inuse_, 0);
5964
return *this;
6065
}
61-
~FlatCordBuilder() {
62-
if (data_) {
63-
::free(data_);
64-
}
65-
}
6666

67-
const char* data() const { return data_; }
68-
char* data() { return data_; }
69-
size_t size() const { return size_; }
70-
size_t available() const { return size_ - inuse_; }
67+
~FlatCordBuilder() = default;
68+
69+
const char* data() const { return region_.data(); }
70+
char* data() { return region_.data(); }
71+
size_t size() const { return region_.size(); }
72+
size_t available() const { return region_.size() - inuse_; }
7173

7274
void set_inuse(size_t size) {
73-
ABSL_CHECK(size <= size_);
75+
ABSL_CHECK(size <= region_.size());
7476
inuse_ = size;
7577
}
7678

7779
/// Append data to the builder.
78-
void Append(absl::string_view sv) {
80+
void Append(std::string_view sv) {
7981
if (ABSL_PREDICT_FALSE(sv.empty())) return;
8082
ABSL_CHECK(sv.size() <= available());
81-
::memcpy(data_ + inuse_, sv.data(), sv.size());
83+
::memcpy(region_.data() + inuse_, sv.data(), sv.size());
8284
inuse_ += sv.size();
8385
}
8486

85-
absl::Cord Build() && {
86-
return absl::MakeCordFromExternal(release(), [](absl::string_view s) {
87-
::free(const_cast<char*>(s.data()));
88-
});
89-
}
87+
absl::Cord Build() && { return std::move(region_).as_cord(); }
9088

9189
private:
92-
/// Releases ownership of the buffer. The caller must call `::free`.
93-
absl::string_view release() {
94-
absl::string_view view(data_, inuse_);
95-
data_ = nullptr;
96-
size_ = 0;
97-
inuse_ = 0;
98-
return view;
99-
}
100-
101-
char* data_ = nullptr;
102-
size_t size_ = 0;
90+
internal_os::MemoryRegion region_;
10391
size_t inuse_ = 0;
10492
};
10593

tensorstore/internal/os/BUILD

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ tensorstore_cc_library(
6767
deps = [
6868
":error_code",
6969
":include_windows",
70+
":memory_region",
7071
":potentially_blocking_region",
7172
":unique_handle",
7273
":wstring", # build_cleaner: keep
@@ -278,3 +279,24 @@ tensorstore_cc_library(
278279
"@com_google_absl//absl/strings",
279280
],
280281
)
282+
283+
tensorstore_cc_library(
284+
name = "memory_region",
285+
srcs = ["memory_region.cc"],
286+
hdrs = ["memory_region.h"],
287+
deps = [
288+
"//tensorstore/util:result",
289+
"@com_google_absl//absl/log:absl_check",
290+
"@com_google_absl//absl/log:absl_log",
291+
"@com_google_absl//absl/strings:cord",
292+
],
293+
)
294+
295+
tensorstore_cc_test(
296+
name = "memory_region_test",
297+
srcs = ["memory_region_test.cc"],
298+
deps = [
299+
":memory_region",
300+
"@com_google_googletest//:gtest_main",
301+
],
302+
)

tensorstore/internal/os/file_util.cc

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@
1414

1515
#include "tensorstore/internal/os/file_util.h"
1616

17+
#include <stddef.h>
18+
19+
#include <cassert>
20+
#include <cstring>
1721
#include <string>
1822

19-
#include "absl/status/status.h"
20-
#include "absl/strings/str_cat.h"
21-
#include "tensorstore/util/quote_string.h"
2223
#include "tensorstore/util/result.h"
2324
#include "tensorstore/util/status.h"
2425

@@ -31,19 +32,46 @@ Result<std::string> ReadAllToString(const std::string& path) {
3132

3233
internal_os::FileInfo info;
3334
TENSORSTORE_RETURN_IF_ERROR(internal_os::GetFileInfo(fd.get(), &info));
34-
if (internal_os::GetSize(info) == 0) {
35-
return absl::InternalError(
36-
absl::StrCat("File ", QuoteString(path), " is empty"));
35+
36+
// Handle the case where the file is empty.
37+
std::string result(internal_os::GetSize(info), 0);
38+
if (result.empty()) {
39+
result.resize(4096);
3740
}
38-
std::string result;
39-
result.resize(internal_os::GetSize(info));
41+
42+
size_t offset = 0;
4043
TENSORSTORE_ASSIGN_OR_RETURN(
4144
auto read,
42-
internal_os::ReadFromFile(fd.get(), result.data(), result.size(), 0));
43-
if (read != result.size()) {
44-
return absl::InternalError("Failed to read entire file");
45+
internal_os::ReadFromFile(fd.get(), &result[0], result.size(), 0));
46+
offset += read;
47+
48+
while (true) {
49+
assert(offset <= result.size());
50+
if ((result.size() - offset) < 4096) {
51+
// In most cases the buffer will be at least as large as necessary and
52+
// this read will return 0, avoiding the copy+resize.
53+
char buffer[4096];
54+
TENSORSTORE_ASSIGN_OR_RETURN(
55+
read,
56+
internal_os::ReadFromFile(fd.get(), buffer, sizeof(buffer), offset));
57+
if (read > 0) {
58+
// Amortized resize; double the size of the buffer.
59+
if (read > result.size()) {
60+
result.resize(read + result.size() * 2);
61+
}
62+
memcpy(&result[offset], buffer, read);
63+
}
64+
} else {
65+
TENSORSTORE_ASSIGN_OR_RETURN(
66+
read, internal_os::ReadFromFile(fd.get(), &result[offset],
67+
result.size() - offset, offset));
68+
}
69+
if (read == 0) {
70+
result.resize(offset);
71+
return result;
72+
}
73+
offset += read;
4574
}
46-
return result;
4775
}
4876

4977
} // namespace internal_os

tensorstore/internal/os/file_util.h

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "absl/status/status.h"
3030
#include "absl/strings/cord.h"
3131
#include "absl/time/time.h"
32+
#include "tensorstore/internal/os/memory_region.h"
3233
#include "tensorstore/internal/os/unique_handle.h"
3334
#include "tensorstore/util/result.h"
3435

@@ -105,46 +106,9 @@ uint32_t GetDefaultPageSize();
105106
/// \param offset Byte offset within file at which to start reading.
106107
/// \param size Number of bytes to read, or 0 to read the entire file.
107108
/// \returns The contents of the file or a failure absl::Status code.
108-
Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
109+
Result<MemoryRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
109110
size_t size);
110111

111-
class MappedRegion {
112-
public:
113-
// ::munmap happens when the MappedRegion destructor runs.
114-
~MappedRegion();
115-
116-
MappedRegion(const MappedRegion&) = delete;
117-
MappedRegion& operator=(const MappedRegion&) = delete;
118-
119-
MappedRegion(MappedRegion&& other) { *this = std::move(other); }
120-
MappedRegion& operator=(MappedRegion&& other) {
121-
data_ = std::exchange(other.data_, nullptr);
122-
size_ = std::exchange(other.size_, 0);
123-
return *this;
124-
}
125-
126-
std::string_view as_string_view() const {
127-
return std::string_view(data_, size_);
128-
}
129-
130-
absl::Cord as_cord() && {
131-
std::string_view string_view = as_string_view();
132-
data_ = nullptr;
133-
size_ = 0;
134-
return absl::MakeCordFromExternal(
135-
string_view, [](auto s) { MappedRegion cleanup(s.data(), s.size()); });
136-
}
137-
138-
private:
139-
MappedRegion(const char* data, size_t size) : data_(data), size_(size) {}
140-
141-
friend Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t,
142-
size_t);
143-
144-
const char* data_;
145-
size_t size_;
146-
};
147-
148112
/// --------------------------------------------------------------------------
149113

150114
// Restricted subset of POSIX open flags.

tensorstore/internal/os/file_util_posix.cc

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "tensorstore/internal/metrics/gauge.h"
5353
#include "tensorstore/internal/metrics/metadata.h"
5454
#include "tensorstore/internal/os/error_code.h"
55+
#include "tensorstore/internal/os/memory_region.h"
5556
#include "tensorstore/internal/os/potentially_blocking_region.h"
5657
#include "tensorstore/internal/tracing/logged_trace_span.h"
5758
#include "tensorstore/util/quote_string.h"
@@ -376,7 +377,16 @@ uint32_t GetDefaultPageSize() {
376377
return kDefaultPageSize;
377378
}
378379

379-
Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
380+
namespace {
381+
void UnmapFilePosix(char* data, size_t size) {
382+
if (::munmap(data, size) != 0) {
383+
ABSL_LOG(FATAL) << StatusFromOsError(errno, "Failed to unmap file");
384+
}
385+
mmap_active.Decrement();
386+
}
387+
} // namespace
388+
389+
Result<MemoryRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
380390
size_t size) {
381391
#if ABSL_HAVE_MMAP
382392
LoggedTraceSpan tspan(__func__, detail_logging.Level(1),
@@ -402,7 +412,7 @@ Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
402412
size = file_size - offset;
403413
}
404414
if (size == 0) {
405-
return MappedRegion(nullptr, 0);
415+
return MemoryRegion(nullptr, 0, UnmapFilePosix);
406416
}
407417
}
408418

@@ -416,21 +426,12 @@ Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
416426
mmap_count.Increment();
417427
mmap_bytes.IncrementBy(size);
418428
mmap_active.Increment();
419-
return MappedRegion(static_cast<const char*>(address), size);
429+
return MemoryRegion(static_cast<char*>(address), size, UnmapFilePosix);
420430
#else
421431
return absl::UnimplementedError("::mmap not supported");
422432
#endif
423433
}
424434

425-
MappedRegion::~MappedRegion() {
426-
if (data_) {
427-
if (::munmap(const_cast<char*>(data_), size_) != 0) {
428-
ABSL_LOG(FATAL) << StatusFromOsError(errno, "Failed to unmap file");
429-
}
430-
mmap_active.Decrement();
431-
}
432-
}
433-
434435
absl::Status FsyncFile(FileDescriptor fd) {
435436
LoggedTraceSpan tspan(__func__, detail_logging.Level(1), {{"fd", fd}});
436437
PotentiallyBlockingRegion region;

tensorstore/internal/os/file_util_win.cc

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
#include "tensorstore/internal/metrics/gauge.h"
9090
#include "tensorstore/internal/metrics/metadata.h"
9191
#include "tensorstore/internal/os/error_code.h"
92+
#include "tensorstore/internal/os/memory_region.h"
9293
#include "tensorstore/internal/os/potentially_blocking_region.h"
9394
#include "tensorstore/internal/os/wstring.h"
9495
#include "tensorstore/internal/tracing/logged_trace_span.h"
@@ -464,7 +465,17 @@ uint32_t GetDefaultPageSize() {
464465
return kDefaultPageSize;
465466
}
466467
467-
Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
468+
namespace {
469+
void UnmapFileWin32(char* data, size_t size) {
470+
if (!::UnmapViewOfFile(data)) {
471+
ABSL_LOG(FATAL) << StatusFromOsError(::GetLastError(),
472+
"Failed in UnmapViewOfFile");
473+
}
474+
mmap_active.Decrement();
475+
}
476+
} // namespace
477+
478+
Result<MemoryRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
468479
size_t size) {
469480
if (offset > 0 && offset % GetDefaultPageSize() != 0) {
470481
return absl::InvalidArgumentError(
@@ -513,17 +524,7 @@ Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
513524
mmap_count.Increment();
514525
mmap_bytes.IncrementBy(size);
515526
mmap_active.Increment();
516-
return MappedRegion(static_cast<const char*>(address), size);
517-
}
518-
519-
MappedRegion::~MappedRegion() {
520-
if (data_) {
521-
if (!::UnmapViewOfFile(const_cast<char*>(data_))) {
522-
ABSL_LOG(FATAL) << StatusFromOsError(::GetLastError(),
523-
"Failed in UnmapViewOfFile");
524-
}
525-
mmap_active.Decrement();
526-
}
527+
return MemoryRegion(static_cast<char*>(address), size, UnmapFileWin32);
527528
}
528529
529530
absl::Status FsyncFile(FileDescriptor fd) {

0 commit comments

Comments
 (0)