Skip to content

Commit f9cd2ee

Browse files
[CAS] Add MappedFileRegionArena (#114099)
Add MappedFileRegionArena which can be served as a file system backed persistent memory allocator. The allocator works like a BumpPtrAllocator, and is designed to be thread safe and process safe. The implementation relies on the POSIX compliance of file system and doesn't work on all file systems. If the file system supports lazy tail (doesn't allocate disk space if the tail of the large file is not used), user has more flexibility to declare a larger capacity. The allocator works by using a atomically updated bump ptr at a location that can be customized by the user. The atomic pointer points to the next available space to allocate, and the allocator will resize/truncate to current usage once all clients closed the allocator. Windows implementation contributed by: @hjyamauchi
1 parent b247698 commit f9cd2ee

File tree

12 files changed

+962
-0
lines changed

12 files changed

+962
-0
lines changed

llvm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,7 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
878878
option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON)
879879
option (LLVM_ENABLE_BINDINGS "Build bindings." ON)
880880
option (LLVM_ENABLE_TELEMETRY "Enable the telemetry library. If set to OFF, library cannot be enabled after build (eg., at runtime)" ON)
881+
option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ON)
881882

882883
set(LLVM_INSTALL_DOXYGEN_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html"
883884
CACHE STRING "Doxygen-generated HTML documentation install directory")
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This file declares interface for MappedFileRegionArena, a bump pointer
11+
/// allocator, backed by a memory-mapped file.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_CAS_MAPPEDFILEREGIONARENA_H
16+
#define LLVM_CAS_MAPPEDFILEREGIONARENA_H
17+
18+
#include "llvm/Support/Alignment.h"
19+
#include "llvm/Support/FileSystem.h"
20+
#include <atomic>
21+
22+
namespace llvm::cas {
23+
24+
/// Allocator for an owned mapped file region that supports thread-safe and
25+
/// process-safe bump pointer allocation.
26+
///
27+
/// This allocator is designed to create a sparse file when supported by the
28+
/// filesystem's \c ftruncate so that it can be used with a large maximum size.
29+
/// It will also attempt to shrink the underlying file down to its current
30+
/// allocation size when the last concurrent mapping is closed.
31+
///
32+
/// Process-safe. Uses file locks when resizing the file during initialization
33+
/// and destruction.
34+
///
35+
/// Thread-safe. Requires OS support thread-safe file lock.
36+
///
37+
/// Provides 8-byte alignment for all allocations.
38+
class MappedFileRegionArena {
39+
public:
40+
using RegionT = sys::fs::mapped_file_region;
41+
42+
/// Header for MappedFileRegionArena. It can be configured to be located
43+
/// at any location within the file and the allocation will be appended after
44+
/// the header.
45+
struct Header {
46+
// BumpPtr for new allocation.
47+
std::atomic<uint64_t> BumpPtr;
48+
// Allocated size on disk.
49+
std::atomic<uint64_t> AllocatedSize;
50+
// Capacity of the file.
51+
std::atomic<uint64_t> Capacity;
52+
// Offset from the beginning of the file to this header (for verification).
53+
std::atomic<uint64_t> HeaderOffset;
54+
};
55+
56+
/// Create a \c MappedFileRegionArena.
57+
///
58+
/// \param Path the path to open the mapped region.
59+
/// \param Capacity the maximum size for the mapped file region.
60+
/// \param HeaderOffset the offset at which to store the header. This is so
61+
/// that information can be stored before the header, like a file magic.
62+
/// \param NewFileConstructor is for constructing new files. It has exclusive
63+
/// access to the file. Must call \c initializeBumpPtr.
64+
static Expected<MappedFileRegionArena>
65+
create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
66+
function_ref<Error(MappedFileRegionArena &)> NewFileConstructor);
67+
68+
/// Minimum alignment for allocations, currently hardcoded to 8B.
69+
static constexpr Align getAlign() {
70+
// Trick Align into giving us '8' as a constexpr.
71+
struct alignas(8) T {};
72+
static_assert(alignof(T) == 8, "Tautology failed?");
73+
return Align::Of<T>();
74+
}
75+
76+
/// Allocate at least \p AllocSize. Rounds up to \a getAlign().
77+
Expected<char *> allocate(uint64_t AllocSize) {
78+
auto Offset = allocateOffset(AllocSize);
79+
if (LLVM_UNLIKELY(!Offset))
80+
return Offset.takeError();
81+
return data() + *Offset;
82+
}
83+
/// Allocate, returning the offset from \a data() instead of a pointer.
84+
Expected<int64_t> allocateOffset(uint64_t AllocSize);
85+
86+
char *data() const { return Region.data(); }
87+
uint64_t size() const { return H->BumpPtr; }
88+
uint64_t capacity() const { return Region.size(); }
89+
90+
RegionT &getRegion() { return Region; }
91+
92+
~MappedFileRegionArena() { destroyImpl(); }
93+
94+
MappedFileRegionArena() = default;
95+
MappedFileRegionArena(MappedFileRegionArena &&RHS) { moveImpl(RHS); }
96+
MappedFileRegionArena &operator=(MappedFileRegionArena &&RHS) {
97+
destroyImpl();
98+
moveImpl(RHS);
99+
return *this;
100+
}
101+
102+
MappedFileRegionArena(const MappedFileRegionArena &) = delete;
103+
MappedFileRegionArena &operator=(const MappedFileRegionArena &) = delete;
104+
105+
private:
106+
// initialize header from offset.
107+
void initializeHeader(uint64_t HeaderOffset);
108+
109+
void destroyImpl();
110+
void moveImpl(MappedFileRegionArena &RHS) {
111+
std::swap(Region, RHS.Region);
112+
std::swap(H, RHS.H);
113+
std::swap(Path, RHS.Path);
114+
std::swap(FD, RHS.FD);
115+
std::swap(SharedLockFD, RHS.SharedLockFD);
116+
}
117+
118+
private:
119+
RegionT Region;
120+
Header *H = nullptr;
121+
std::string Path;
122+
// File descriptor for the main storage file.
123+
std::optional<int> FD;
124+
// File descriptor for the file used as reader/writer lock.
125+
std::optional<int> SharedLockFD;
126+
};
127+
128+
} // namespace llvm::cas
129+
130+
#endif // LLVM_CAS_MAPPEDFILEREGIONARENA_H

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,7 @@
146146
coverage bugs, and to 0 otherwise. */
147147
#cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
148148

149+
/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
150+
#cmakedefine01 LLVM_ENABLE_ONDISK_CAS
151+
149152
#endif

llvm/include/llvm/Support/FileSystem.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,11 @@ LLVM_ABI std::error_code copy_file(const Twine &From, int ToFD);
410410
/// platform-specific error_code.
411411
LLVM_ABI std::error_code resize_file(int FD, uint64_t Size);
412412

413+
/// Resize path to size with sparse files explicitly enabled. It uses
414+
/// FSCTL_SET_SPARSE On Windows. This is the same as resize_file on
415+
/// non-Windows
416+
LLVM_ABI std::error_code resize_file_sparse(int FD, uint64_t Size);
417+
413418
/// Resize \p FD to \p Size before mapping \a mapped_file_region::readwrite. On
414419
/// non-Windows, this calls \a resize_file(). On Windows, this is a no-op,
415420
/// since the subsequent mapping (via \c CreateFileMapping) automatically

llvm/lib/CAS/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ add_llvm_component_library(LLVMCAS
33
ActionCaches.cpp
44
BuiltinCAS.cpp
55
InMemoryCAS.cpp
6+
MappedFileRegionArena.cpp
67
ObjectStore.cpp
8+
OnDiskCommon.cpp
79

810
ADDITIONAL_HEADER_DIRS
911
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS

0 commit comments

Comments
 (0)