Skip to content

Commit fbd2669

Browse files
Automerge: Re-land #161264: [CAS] Add OnDiskDataAllocator (#162112)
Fix the build configuration that has OnDiskCAS disabled.
2 parents c498656 + 2aff3c6 commit fbd2669

File tree

5 files changed

+397
-0
lines changed

5 files changed

+397
-0
lines changed
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This file declares interface for OnDiskDataAllocator, a file backed data
11+
/// pool can be used to allocate space to store data packed in a single file. It
12+
/// is based on MappedFileRegionArena and includes a header in the beginning to
13+
/// provide metadata.
14+
///
15+
//===----------------------------------------------------------------------===//
16+
17+
#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H
18+
#define LLVM_CAS_ONDISKDATAALLOCATOR_H
19+
20+
#include "llvm/ADT/ArrayRef.h"
21+
#include "llvm/CAS/FileOffset.h"
22+
#include "llvm/Support/Error.h"
23+
24+
namespace llvm::cas {
25+
26+
/// Sink for data. Stores variable length data with 8-byte alignment. Does not
27+
/// track size of data, which is assumed to known from context, or embedded.
28+
/// Uses 0-padding but does not guarantee 0-termination.
29+
class OnDiskDataAllocator {
30+
public:
31+
using ValueProxy = MutableArrayRef<char>;
32+
33+
/// A pointer to data stored on disk.
34+
class OnDiskPtr {
35+
public:
36+
FileOffset getOffset() const { return Offset; }
37+
explicit operator bool() const { return bool(getOffset()); }
38+
const ValueProxy &operator*() const {
39+
assert(Offset && "Null dereference");
40+
return Value;
41+
}
42+
const ValueProxy *operator->() const {
43+
assert(Offset && "Null dereference");
44+
return &Value;
45+
}
46+
47+
OnDiskPtr() = default;
48+
49+
private:
50+
friend class OnDiskDataAllocator;
51+
OnDiskPtr(FileOffset Offset, ValueProxy Value)
52+
: Offset(Offset), Value(Value) {}
53+
FileOffset Offset;
54+
ValueProxy Value;
55+
};
56+
57+
/// Get the data of \p Size stored at the given \p Offset. Note the allocator
58+
/// doesn't keep track of the allocation size, thus \p Size doesn't need to
59+
/// match the size of allocation but needs to be smaller.
60+
Expected<ArrayRef<char>> get(FileOffset Offset, size_t Size) const;
61+
62+
/// Allocate at least \p Size with 8-byte alignment.
63+
Expected<OnDiskPtr> allocate(size_t Size);
64+
65+
/// \returns the buffer that was allocated at \p create time, with size
66+
/// \p UserHeaderSize.
67+
MutableArrayRef<uint8_t> getUserHeader();
68+
69+
size_t size() const;
70+
size_t capacity() const;
71+
72+
static Expected<OnDiskDataAllocator>
73+
create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
74+
std::optional<uint64_t> NewFileInitialSize,
75+
uint32_t UserHeaderSize = 0,
76+
function_ref<void(void *)> UserHeaderInit = nullptr);
77+
78+
OnDiskDataAllocator(OnDiskDataAllocator &&RHS);
79+
OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS);
80+
81+
// No copy. Just call \a create() again.
82+
OnDiskDataAllocator(const OnDiskDataAllocator &) = delete;
83+
OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete;
84+
85+
~OnDiskDataAllocator();
86+
87+
private:
88+
struct ImplType;
89+
explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl);
90+
std::unique_ptr<ImplType> Impl;
91+
};
92+
93+
} // namespace llvm::cas
94+
95+
#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H

llvm/lib/CAS/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMCAS
77
MappedFileRegionArena.cpp
88
ObjectStore.cpp
99
OnDiskCommon.cpp
10+
OnDiskDataAllocator.cpp
1011
OnDiskTrieRawHashMap.cpp
1112

1213
ADDITIONAL_HEADER_DIRS
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file Implements OnDiskDataAllocator.
10+
///
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/CAS/OnDiskDataAllocator.h"
14+
#include "DatabaseFile.h"
15+
#include "llvm/Config/llvm-config.h"
16+
17+
using namespace llvm;
18+
using namespace llvm::cas;
19+
using namespace llvm::cas::ondisk;
20+
21+
#if LLVM_ENABLE_ONDISK_CAS
22+
23+
//===----------------------------------------------------------------------===//
24+
// DataAllocator data structures.
25+
//===----------------------------------------------------------------------===//
26+
27+
namespace {
28+
/// DataAllocator table layout:
29+
/// - [8-bytes: Generic table header]
30+
/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
31+
/// - 8-bytes: Size for user data header
32+
/// - <user data buffer>
33+
///
34+
/// Record layout:
35+
/// - <data>
36+
class DataAllocatorHandle {
37+
public:
38+
static constexpr TableHandle::TableKind Kind =
39+
TableHandle::TableKind::DataAllocator;
40+
41+
struct Header {
42+
TableHandle::Header GenericHeader;
43+
std::atomic<int64_t> AllocatorOffset;
44+
const uint64_t UserHeaderSize;
45+
};
46+
47+
operator TableHandle() const {
48+
if (!H)
49+
return TableHandle();
50+
return TableHandle(*Region, H->GenericHeader);
51+
}
52+
53+
Expected<MutableArrayRef<char>> allocate(MappedFileRegionArena &Alloc,
54+
size_t DataSize) {
55+
assert(&Alloc.getRegion() == Region);
56+
auto Ptr = Alloc.allocate(DataSize);
57+
if (LLVM_UNLIKELY(!Ptr))
58+
return Ptr.takeError();
59+
return MutableArrayRef(*Ptr, DataSize);
60+
}
61+
62+
explicit operator bool() const { return H; }
63+
const Header &getHeader() const { return *H; }
64+
MappedFileRegion &getRegion() const { return *Region; }
65+
66+
MutableArrayRef<uint8_t> getUserHeader() {
67+
return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1),
68+
H->UserHeaderSize);
69+
}
70+
71+
static Expected<DataAllocatorHandle>
72+
create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize);
73+
74+
DataAllocatorHandle() = default;
75+
DataAllocatorHandle(MappedFileRegion &Region, Header &H)
76+
: Region(&Region), H(&H) {}
77+
DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
78+
: DataAllocatorHandle(
79+
Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
80+
}
81+
82+
private:
83+
MappedFileRegion *Region = nullptr;
84+
Header *H = nullptr;
85+
};
86+
87+
} // end anonymous namespace
88+
89+
struct OnDiskDataAllocator::ImplType {
90+
DatabaseFile File;
91+
DataAllocatorHandle Store;
92+
};
93+
94+
Expected<DataAllocatorHandle>
95+
DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
96+
uint32_t UserHeaderSize) {
97+
// Allocate.
98+
auto Offset =
99+
Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1);
100+
if (LLVM_UNLIKELY(!Offset))
101+
return Offset.takeError();
102+
103+
// Construct the header and the name.
104+
assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
105+
auto *H = new (Alloc.getRegion().data() + *Offset)
106+
Header{{TableHandle::TableKind::DataAllocator,
107+
static_cast<uint16_t>(Name.size()),
108+
static_cast<int32_t>(sizeof(Header) + UserHeaderSize)},
109+
/*AllocatorOffset=*/{0},
110+
/*UserHeaderSize=*/UserHeaderSize};
111+
// Memset UserHeader.
112+
char *UserHeader = reinterpret_cast<char *>(H + 1);
113+
memset(UserHeader, 0, UserHeaderSize);
114+
// Write database file name (null-terminated).
115+
char *NameStorage = UserHeader + UserHeaderSize;
116+
llvm::copy(Name, NameStorage);
117+
NameStorage[Name.size()] = 0;
118+
return DataAllocatorHandle(Alloc.getRegion(), *H);
119+
}
120+
121+
Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
122+
const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize,
123+
std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
124+
function_ref<void(void *)> UserHeaderInit) {
125+
assert(!UserHeaderSize || UserHeaderInit);
126+
SmallString<128> PathStorage;
127+
StringRef Path = PathTwine.toStringRef(PathStorage);
128+
SmallString<128> TableNameStorage;
129+
StringRef TableName = TableNameTwine.toStringRef(TableNameStorage);
130+
131+
// Constructor for if the file doesn't exist.
132+
auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
133+
auto Store =
134+
DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize);
135+
if (LLVM_UNLIKELY(!Store))
136+
return Store.takeError();
137+
138+
if (auto E = DB.addTable(*Store))
139+
return E;
140+
141+
if (UserHeaderSize)
142+
UserHeaderInit(Store->getUserHeader().data());
143+
return Error::success();
144+
};
145+
146+
// Get or create the file.
147+
Expected<DatabaseFile> File =
148+
DatabaseFile::create(Path, MaxFileSize, NewDBConstructor);
149+
if (!File)
150+
return File.takeError();
151+
152+
// Find the table and validate it.
153+
std::optional<TableHandle> Table = File->findTable(TableName);
154+
if (!Table)
155+
return createTableConfigError(std::errc::argument_out_of_domain, Path,
156+
TableName, "table not found");
157+
if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind,
158+
(size_t)Table->getHeader().Kind, Path, TableName))
159+
return std::move(E);
160+
auto Store = Table->cast<DataAllocatorHandle>();
161+
assert(Store && "Already checked the kind");
162+
163+
// Success.
164+
OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store};
165+
return OnDiskDataAllocator(std::make_unique<ImplType>(std::move(Impl)));
166+
}
167+
168+
Expected<OnDiskDataAllocator::OnDiskPtr>
169+
OnDiskDataAllocator::allocate(size_t Size) {
170+
auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size);
171+
if (LLVM_UNLIKELY(!Data))
172+
return Data.takeError();
173+
174+
return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()),
175+
*Data);
176+
}
177+
178+
Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
179+
size_t Size) const {
180+
assert(Offset);
181+
assert(Impl);
182+
if (Offset.get() + Size >= Impl->File.getAlloc().size())
183+
return createStringError(make_error_code(std::errc::protocol_error),
184+
"requested size too large in allocator");
185+
return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size};
186+
}
187+
188+
MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
189+
return Impl->Store.getUserHeader();
190+
}
191+
192+
size_t OnDiskDataAllocator::size() const { return Impl->File.size(); }
193+
size_t OnDiskDataAllocator::capacity() const {
194+
return Impl->File.getRegion().size();
195+
}
196+
197+
OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl)
198+
: Impl(std::move(Impl)) {}
199+
200+
#else // !LLVM_ENABLE_ONDISK_CAS
201+
202+
struct OnDiskDataAllocator::ImplType {};
203+
204+
Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
205+
const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
206+
std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
207+
function_ref<void(void *)> UserHeaderInit) {
208+
return createStringError(make_error_code(std::errc::not_supported),
209+
"OnDiskDataAllocator is not supported");
210+
}
211+
212+
Expected<OnDiskDataAllocator::OnDiskPtr>
213+
OnDiskDataAllocator::allocate(size_t Size) {
214+
return createStringError(make_error_code(std::errc::not_supported),
215+
"OnDiskDataAllocator is not supported");
216+
}
217+
218+
Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
219+
size_t Size) const {
220+
return createStringError(make_error_code(std::errc::not_supported),
221+
"OnDiskDataAllocator is not supported");
222+
}
223+
224+
MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; }
225+
226+
size_t OnDiskDataAllocator::size() const { return 0; }
227+
size_t OnDiskDataAllocator::capacity() const { return 0; }
228+
229+
#endif // LLVM_ENABLE_ONDISK_CAS
230+
231+
OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default;
232+
OnDiskDataAllocator &
233+
OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default;
234+
OnDiskDataAllocator::~OnDiskDataAllocator() = default;

llvm/unittests/CAS/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ add_llvm_unittest(CASTests
88
ActionCacheTest.cpp
99
CASTestConfig.cpp
1010
ObjectStoreTest.cpp
11+
OnDiskDataAllocatorTest.cpp
1112
OnDiskTrieRawHashMapTest.cpp
1213
ProgramTest.cpp
1314
)

0 commit comments

Comments
 (0)