Skip to content

Commit 01195e5

Browse files
[CAS] Add OnDiskTrieRawHashMap
Add OnDiskTrieRawHashMap. This is a on-disk persistent hash map that uses a Trie data structure that is similar to TrieRawHashMap. OnDiskTrieRawHashMap is thread safe and process safe. It is mostly lock free, except it internally coordinates cross process creation and closing using file lock. OnDiskTrieRawHashMap is used as the foundation to implement OnDisk CAS storage which maps hash to store data. Reviewers: Pull Request: #114100
1 parent b154b05 commit 01195e5

File tree

7 files changed

+2184
-5
lines changed

7 files changed

+2184
-5
lines changed

llvm/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,6 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
878878
option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON)
879879
option (LLVM_ENABLE_BINDINGS "Build bindings." ON)
880880
option (LLVM_ENABLE_TELEMETRY "Enable the telemetry library. If set to OFF, library cannot be enabled after build (eg., at runtime)" ON)
881-
option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ON)
882881

883882
set(LLVM_INSTALL_DOXYGEN_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html"
884883
CACHE STRING "Doxygen-generated HTML documentation install directory")

llvm/cmake/modules/HandleLLVMOptions.cmake

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,17 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
512512
endif( LLVM_BUILD_32_BITS )
513513
endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
514514

515+
# Check for lock-free 64 bit atomic operations to enable OnDiskCAS.
516+
CHECK_CXX_SOURCE_COMPILES("
517+
#include <atomic>
518+
#include <cstdint>
519+
static_assert(sizeof(std::atomic<int64_t>) == sizeof(uint64_t));
520+
int main() {
521+
return 0;
522+
}
523+
" HAVE_LOCKFREE_ATOMICS64)
524+
option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ${HAVE_LOCKFREE_ATOMICS64})
525+
515526
# If building on a GNU specific 32-bit system, make sure off_t is 64 bits
516527
# so that off_t can stored offset > 2GB.
517528
# Android until version N (API 24) doesn't support it.
Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This file declares interface for OnDiskTrieRawHashMap, a thread-safe and
11+
/// (mostly) lock-free hash map stored as trie and backed by persistent files on
12+
/// disk.
13+
///
14+
//===----------------------------------------------------------------------===//
15+
16+
#ifndef LLVM_CAS_ONDISKHASHMAPPEDTRIE_H
17+
#define LLVM_CAS_ONDISKHASHMAPPEDTRIE_H
18+
19+
#include "llvm/ADT/ArrayRef.h"
20+
#include "llvm/ADT/STLExtras.h"
21+
#include "llvm/ADT/STLFunctionalExtras.h"
22+
#include "llvm/ADT/StringRef.h"
23+
#include "llvm/Support/Error.h"
24+
#include <optional>
25+
26+
namespace llvm {
27+
28+
class raw_ostream;
29+
30+
namespace cas {
31+
32+
/// FileOffset is a wrapper around `int64_t` to represent the offset of data
33+
/// from the beginning of the file.
34+
class FileOffset {
35+
public:
36+
int64_t get() const { return Offset; }
37+
38+
explicit operator bool() const { return Offset; }
39+
40+
FileOffset() = default;
41+
explicit FileOffset(int64_t Offset) : Offset(Offset) { assert(Offset >= 0); }
42+
43+
private:
44+
int64_t Offset = 0;
45+
};
46+
47+
/// OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
48+
/// The keys are fixed length, and are expected to be binary hashes with a
49+
/// normal distribution.
50+
///
51+
/// - Thread-safety is achieved through the use of atomics within a shared
52+
/// memory mapping. Atomic access does not work on networked filesystems.
53+
/// - Filesystem locks are used, but only sparingly:
54+
/// - during initialization, for creating / opening an existing store;
55+
/// - for the lifetime of the instance, a shared/reader lock is held
56+
/// - during destruction, if there are no concurrent readers, to shrink the
57+
/// files to their minimum size.
58+
/// - Path is used as a directory:
59+
/// - "index" stores the root trie and subtries.
60+
/// - "data" stores (most of) the entries, like a bump-ptr-allocator.
61+
/// - Large entries are stored externally in a file named by the key.
62+
/// - Code is system-dependent (Windows not yet implemented), and binary format
63+
/// itself is not portable. These are not artifacts that can/should be moved
64+
/// between different systems; they are only appropriate for local storage.
65+
class OnDiskTrieRawHashMap {
66+
public:
67+
LLVM_DUMP_METHOD void dump() const;
68+
void
69+
print(raw_ostream &OS,
70+
function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
71+
72+
public:
73+
/// Const value proxy to access the records stored in TrieRawHashMap.
74+
struct ConstValueProxy {
75+
ConstValueProxy() = default;
76+
ConstValueProxy(ArrayRef<uint8_t> Hash, ArrayRef<char> Data)
77+
: Hash(Hash), Data(Data) {}
78+
ConstValueProxy(ArrayRef<uint8_t> Hash, StringRef Data)
79+
: Hash(Hash), Data(Data.begin(), Data.size()) {}
80+
81+
ArrayRef<uint8_t> Hash;
82+
ArrayRef<char> Data;
83+
};
84+
85+
/// Value proxy to access the records stored in TrieRawHashMap.
86+
struct ValueProxy {
87+
operator ConstValueProxy() const { return ConstValueProxy(Hash, Data); }
88+
89+
ValueProxy() = default;
90+
ValueProxy(ArrayRef<uint8_t> Hash, MutableArrayRef<char> Data)
91+
: Hash(Hash), Data(Data) {}
92+
93+
ArrayRef<uint8_t> Hash;
94+
MutableArrayRef<char> Data;
95+
};
96+
97+
/// Validate the trie data structure.
98+
///
99+
/// Callback receives the file offset to the data entry and the data stored.
100+
Error validate(
101+
function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const;
102+
103+
public:
104+
/// Template class to implement a `pointer` type into the trie data structure.
105+
///
106+
/// It provides pointer-like operation, e.g., dereference to get underlying
107+
/// data. It also reserves top 16 bits of the pointer value, which can be used
108+
/// to pack additional information if needed.
109+
template <class ProxyT> class PointerImpl {
110+
public:
111+
FileOffset getOffset() const {
112+
return FileOffset(OffsetLow32 | (uint64_t)OffsetHigh16 << 32);
113+
}
114+
115+
explicit operator bool() const { return IsValue; }
116+
117+
const ProxyT &operator*() const {
118+
assert(IsValue);
119+
return Value;
120+
}
121+
const ProxyT *operator->() const {
122+
assert(IsValue);
123+
return &Value;
124+
}
125+
126+
PointerImpl() = default;
127+
128+
protected:
129+
PointerImpl(FileOffset Offset, ProxyT Value)
130+
: PointerImpl(Value, Offset, /*IsValue=*/true) {}
131+
132+
PointerImpl(ProxyT Value, FileOffset Offset, bool IsValue)
133+
: Value(Value), OffsetLow32((uint64_t)Offset.get()),
134+
OffsetHigh16((uint64_t)Offset.get() >> 32), IsValue(IsValue) {
135+
if (IsValue)
136+
checkOffset(Offset);
137+
}
138+
139+
static void checkOffset(FileOffset Offset) {
140+
assert(Offset.get() > 0);
141+
assert((uint64_t)Offset.get() < (1LL << 48));
142+
}
143+
144+
ProxyT Value;
145+
uint32_t OffsetLow32 = 0;
146+
uint16_t OffsetHigh16 = 0;
147+
148+
// True if points to a value (not a "nullptr"). Use an extra field because
149+
// 0 can be a valid offset.
150+
bool IsValue = false;
151+
};
152+
153+
class pointer;
154+
class const_pointer : public PointerImpl<ConstValueProxy> {
155+
public:
156+
const_pointer() = default;
157+
158+
private:
159+
friend class pointer;
160+
friend class OnDiskTrieRawHashMap;
161+
using const_pointer::PointerImpl::PointerImpl;
162+
};
163+
164+
class pointer : public PointerImpl<ValueProxy> {
165+
public:
166+
operator const_pointer() const {
167+
return const_pointer(Value, getOffset(), IsValue);
168+
}
169+
170+
pointer() = default;
171+
172+
private:
173+
friend class OnDiskTrieRawHashMap;
174+
using pointer::PointerImpl::PointerImpl;
175+
};
176+
177+
pointer getMutablePointer(const_pointer CP) {
178+
if (!CP)
179+
return pointer();
180+
ValueProxy V{CP->Hash, MutableArrayRef(const_cast<char *>(CP->Data.data()),
181+
CP->Data.size())};
182+
return pointer(CP.getOffset(), V);
183+
}
184+
185+
const_pointer find(ArrayRef<uint8_t> Hash) const;
186+
pointer find(ArrayRef<uint8_t> Hash) {
187+
return getMutablePointer(
188+
const_cast<const OnDiskTrieRawHashMap *>(this)->find(Hash));
189+
}
190+
191+
const_pointer recoverFromHashPointer(const uint8_t *HashBegin) const;
192+
pointer recoverFromHashPointer(const uint8_t *HashBegin) {
193+
return getMutablePointer(
194+
const_cast<const OnDiskTrieRawHashMap *>(this)->recoverFromHashPointer(
195+
HashBegin));
196+
}
197+
198+
const_pointer recoverFromFileOffset(FileOffset Offset) const;
199+
pointer recoverFromFileOffset(FileOffset Offset) {
200+
return getMutablePointer(
201+
const_cast<const OnDiskTrieRawHashMap *>(this)->recoverFromFileOffset(
202+
Offset));
203+
}
204+
205+
using LazyInsertOnConstructCB =
206+
function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue)>;
207+
using LazyInsertOnLeakCB =
208+
function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue,
209+
FileOffset FinalOffset, ValueProxy FinalValue)>;
210+
211+
/// Insert lazily.
212+
///
213+
/// \p OnConstruct is called when ready to insert a value, after allocating
214+
/// space for the data. It is called at most once.
215+
///
216+
/// \p OnLeak is called only if \p OnConstruct has been called and a race
217+
/// occurred before insertion, causing the tentative offset and data to be
218+
/// abandoned. This allows clients to clean up other results or update any
219+
/// references.
220+
///
221+
/// NOTE: Does *not* guarantee that \p OnConstruct is only called on success.
222+
/// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize
223+
/// simultaneous writes, but that seems dangerous to use in a memory-mapped
224+
/// file in case a process crashes in the busy state.
225+
Expected<pointer> insertLazy(ArrayRef<uint8_t> Hash,
226+
LazyInsertOnConstructCB OnConstruct = nullptr,
227+
LazyInsertOnLeakCB OnLeak = nullptr);
228+
229+
Expected<pointer> insert(const ConstValueProxy &Value) {
230+
return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) {
231+
assert(Allocated.Hash == Value.Hash);
232+
assert(Allocated.Data.size() == Value.Data.size());
233+
llvm::copy(Value.Data, Allocated.Data.begin());
234+
});
235+
}
236+
237+
size_t size() const;
238+
size_t capacity() const;
239+
240+
/// Gets or creates a file at \p Path with a hash-mapped trie named \p
241+
/// TrieName. The hash size is \p NumHashBits (in bits) and the records store
242+
/// data of size \p DataSize (in bytes).
243+
///
244+
/// \p MaxFileSize controls the maximum file size to support, limiting the
245+
/// size of the \a mapped_file_region. \p NewFileInitialSize is the starting
246+
/// size if a new file is created.
247+
///
248+
/// \p NewTableNumRootBits and \p NewTableNumSubtrieBits are hints to
249+
/// configure the trie, if it doesn't already exist.
250+
///
251+
/// \pre NumHashBits is a multiple of 8 (byte-aligned).
252+
static Expected<OnDiskTrieRawHashMap>
253+
create(const Twine &Path, const Twine &TrieName, size_t NumHashBits,
254+
uint64_t DataSize, uint64_t MaxFileSize,
255+
std::optional<uint64_t> NewFileInitialSize,
256+
std::optional<size_t> NewTableNumRootBits = std::nullopt,
257+
std::optional<size_t> NewTableNumSubtrieBits = std::nullopt);
258+
259+
OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS);
260+
OnDiskTrieRawHashMap &operator=(OnDiskTrieRawHashMap &&RHS);
261+
~OnDiskTrieRawHashMap();
262+
263+
private:
264+
struct ImplType;
265+
explicit OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl);
266+
std::unique_ptr<ImplType> Impl;
267+
};
268+
269+
/// Sink for data. Stores variable length data with 8-byte alignment. Does not
270+
/// track size of data, which is assumed to known from context, or embedded.
271+
/// Uses 0-padding but does not guarantee 0-termination.
272+
class OnDiskDataAllocator {
273+
public:
274+
using ValueProxy = MutableArrayRef<char>;
275+
276+
/// An iterator-like return value for data insertion. Maybe it should be
277+
/// called \c iterator, but it has no increment.
278+
class pointer {
279+
public:
280+
FileOffset getOffset() const { return Offset; }
281+
explicit operator bool() const { return bool(getOffset()); }
282+
const ValueProxy &operator*() const {
283+
assert(Offset && "Null dereference");
284+
return Value;
285+
}
286+
const ValueProxy *operator->() const {
287+
assert(Offset && "Null dereference");
288+
return &Value;
289+
}
290+
291+
pointer() = default;
292+
293+
private:
294+
friend class OnDiskDataAllocator;
295+
pointer(FileOffset Offset, ValueProxy Value)
296+
: Offset(Offset), Value(Value) {}
297+
FileOffset Offset;
298+
ValueProxy Value;
299+
};
300+
301+
// Look up the data stored at the given offset.
302+
const char *beginData(FileOffset Offset) const;
303+
char *beginData(FileOffset Offset) {
304+
return const_cast<char *>(
305+
const_cast<const OnDiskDataAllocator *>(this)->beginData(Offset));
306+
}
307+
308+
Expected<pointer> allocate(size_t Size);
309+
Expected<pointer> save(ArrayRef<char> Data) {
310+
auto P = allocate(Data.size());
311+
if (LLVM_UNLIKELY(!P))
312+
return P.takeError();
313+
llvm::copy(Data, (*P)->begin());
314+
return P;
315+
}
316+
Expected<pointer> save(StringRef Data) {
317+
return save(ArrayRef<char>(Data.begin(), Data.size()));
318+
}
319+
320+
/// \returns the buffer that was allocated at \p create time, with size
321+
/// \p UserHeaderSize.
322+
MutableArrayRef<uint8_t> getUserHeader();
323+
324+
size_t size() const;
325+
size_t capacity() const;
326+
327+
static Expected<OnDiskDataAllocator>
328+
create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
329+
std::optional<uint64_t> NewFileInitialSize,
330+
uint32_t UserHeaderSize = 0,
331+
function_ref<void(void *)> UserHeaderInit = nullptr);
332+
333+
OnDiskDataAllocator(OnDiskDataAllocator &&RHS);
334+
OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS);
335+
336+
// No copy. Just call \a create() again.
337+
OnDiskDataAllocator(const OnDiskDataAllocator &) = delete;
338+
OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete;
339+
340+
~OnDiskDataAllocator();
341+
342+
private:
343+
struct ImplType;
344+
explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl);
345+
std::unique_ptr<ImplType> Impl;
346+
};
347+
348+
} // namespace cas
349+
} // namespace llvm
350+
351+
#endif // LLVM_CAS_ONDISKHASHMAPPEDTRIE_H

llvm/lib/CAS/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMCAS
66
MappedFileRegionArena.cpp
77
ObjectStore.cpp
88
OnDiskCommon.cpp
9+
OnDiskTrieRawHashMap.cpp
910

1011
ADDITIONAL_HEADER_DIRS
1112
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS

0 commit comments

Comments
 (0)