-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[CAS] Add OnDiskDataAllocator #161264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
cachemeifyoucan
merged 3 commits into
main
from
users/cachemeifyoucan/spr/cas-add-ondiskdataallocator
Oct 6, 2025
Merged
[CAS] Add OnDiskDataAllocator #161264
Changes from 1 commit
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
/// \file | ||
/// This file declares interface for OnDiskDataAllocator, a file backed data | ||
/// pool can be used to allocate space to store data packed in a single file. It | ||
/// is based on MappedFileRegionArena and includes a header in the beginning to | ||
/// provide metadata. | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H | ||
#define LLVM_CAS_ONDISKDATAALLOCATOR_H | ||
|
||
#include "llvm/ADT/ArrayRef.h" | ||
#include "llvm/CAS/FileOffset.h" | ||
#include "llvm/Support/Error.h" | ||
|
||
namespace llvm::cas { | ||
|
||
/// Sink for data. Stores variable length data with 8-byte alignment. Does not | ||
/// track size of data, which is assumed to known from context, or embedded. | ||
/// Uses 0-padding but does not guarantee 0-termination. | ||
class OnDiskDataAllocator { | ||
public: | ||
using ValueProxy = MutableArrayRef<char>; | ||
|
||
/// An iterator-like return value for data insertion. Maybe it should be | ||
/// called \c iterator, but it has no increment. | ||
class pointer { | ||
public: | ||
FileOffset getOffset() const { return Offset; } | ||
explicit operator bool() const { return bool(getOffset()); } | ||
const ValueProxy &operator*() const { | ||
assert(Offset && "Null dereference"); | ||
return Value; | ||
} | ||
const ValueProxy *operator->() const { | ||
assert(Offset && "Null dereference"); | ||
return &Value; | ||
} | ||
|
||
pointer() = default; | ||
|
||
private: | ||
friend class OnDiskDataAllocator; | ||
pointer(FileOffset Offset, ValueProxy Value) | ||
: Offset(Offset), Value(Value) {} | ||
FileOffset Offset; | ||
ValueProxy Value; | ||
}; | ||
|
||
/// Look up the data stored at the given offset. | ||
const char *beginData(FileOffset Offset) const; | ||
ilovepi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
/// Allocate at least \p Size with 8-byte alignment. | ||
Expected<pointer> allocate(size_t Size); | ||
|
||
/// \returns the buffer that was allocated at \p create time, with size | ||
/// \p UserHeaderSize. | ||
MutableArrayRef<uint8_t> getUserHeader(); | ||
|
||
size_t size() const; | ||
size_t capacity() const; | ||
|
||
static Expected<OnDiskDataAllocator> | ||
create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, | ||
std::optional<uint64_t> NewFileInitialSize, | ||
uint32_t UserHeaderSize = 0, | ||
function_ref<void(void *)> UserHeaderInit = nullptr); | ||
|
||
OnDiskDataAllocator(OnDiskDataAllocator &&RHS); | ||
OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS); | ||
|
||
// No copy. Just call \a create() again. | ||
OnDiskDataAllocator(const OnDiskDataAllocator &) = delete; | ||
OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete; | ||
|
||
~OnDiskDataAllocator(); | ||
|
||
private: | ||
struct ImplType; | ||
explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl); | ||
std::unique_ptr<ImplType> Impl; | ||
}; | ||
|
||
} // namespace llvm::cas | ||
|
||
#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file Implements OnDiskDataAllocator. | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "llvm/CAS/OnDiskDataAllocator.h" | ||
#include "DatabaseFile.h" | ||
#include "llvm/Config/llvm-config.h" | ||
|
||
using namespace llvm; | ||
using namespace llvm::cas; | ||
using namespace llvm::cas::ondisk; | ||
|
||
OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; | ||
OnDiskDataAllocator & | ||
OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; | ||
OnDiskDataAllocator::~OnDiskDataAllocator() = default; | ||
ilovepi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
#if LLVM_ENABLE_ONDISK_CAS | ||
|
||
//===----------------------------------------------------------------------===// | ||
// DataAllocator data structures. | ||
//===----------------------------------------------------------------------===// | ||
|
||
namespace { | ||
/// DataAllocator table layout: | ||
/// - [8-bytes: Generic table header] | ||
/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) | ||
/// - 8-bytes: Size for user data header | ||
/// - <user data buffer> | ||
/// | ||
/// Record layout: | ||
/// - <data> | ||
class DataAllocatorHandle { | ||
public: | ||
static constexpr TableHandle::TableKind Kind = | ||
TableHandle::TableKind::DataAllocator; | ||
|
||
struct Header { | ||
TableHandle::Header GenericHeader; | ||
std::atomic<int64_t> AllocatorOffset; | ||
const uint64_t UserHeaderSize; | ||
}; | ||
|
||
operator TableHandle() const { | ||
if (!H) | ||
return TableHandle(); | ||
return TableHandle(*Region, H->GenericHeader); | ||
} | ||
|
||
Expected<MutableArrayRef<char>> allocate(MappedFileRegionArena &Alloc, | ||
size_t DataSize) { | ||
assert(&Alloc.getRegion() == Region); | ||
auto Ptr = Alloc.allocate(DataSize); | ||
if (LLVM_UNLIKELY(!Ptr)) | ||
return Ptr.takeError(); | ||
return MutableArrayRef(*Ptr, DataSize); | ||
} | ||
|
||
explicit operator bool() const { return H; } | ||
const Header &getHeader() const { return *H; } | ||
MappedFileRegion &getRegion() const { return *Region; } | ||
|
||
MutableArrayRef<uint8_t> getUserHeader() { | ||
return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1), | ||
H->UserHeaderSize); | ||
} | ||
|
||
static Expected<DataAllocatorHandle> | ||
create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize); | ||
|
||
DataAllocatorHandle() = default; | ||
DataAllocatorHandle(MappedFileRegion &Region, Header &H) | ||
: Region(&Region), H(&H) {} | ||
DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset) | ||
: DataAllocatorHandle( | ||
Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { | ||
} | ||
|
||
private: | ||
MappedFileRegion *Region = nullptr; | ||
Header *H = nullptr; | ||
}; | ||
|
||
} // end anonymous namespace | ||
|
||
struct OnDiskDataAllocator::ImplType { | ||
DatabaseFile File; | ||
DataAllocatorHandle Store; | ||
}; | ||
|
||
Expected<DataAllocatorHandle> | ||
DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name, | ||
uint32_t UserHeaderSize) { | ||
// Allocate. | ||
auto Offset = | ||
Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1); | ||
if (LLVM_UNLIKELY(!Offset)) | ||
return Offset.takeError(); | ||
|
||
// Construct the header and the name. | ||
assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); | ||
auto *H = new (Alloc.getRegion().data() + *Offset) | ||
Header{{TableHandle::TableKind::DataAllocator, (uint16_t)Name.size(), | ||
(int32_t)(sizeof(Header) + UserHeaderSize)}, | ||
cachemeifyoucan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
/*AllocatorOffset=*/{0}, | ||
/*UserHeaderSize=*/UserHeaderSize}; | ||
memset(H + 1, 0, UserHeaderSize); | ||
cachemeifyoucan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
char *NameStorage = reinterpret_cast<char *>(H + 1) + UserHeaderSize; | ||
llvm::copy(Name, NameStorage); | ||
NameStorage[Name.size()] = 0; | ||
return DataAllocatorHandle(Alloc.getRegion(), *H); | ||
} | ||
|
||
Expected<OnDiskDataAllocator> OnDiskDataAllocator::create( | ||
const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize, | ||
std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize, | ||
function_ref<void(void *)> UserHeaderInit) { | ||
assert(!UserHeaderSize || UserHeaderInit); | ||
SmallString<128> PathStorage; | ||
StringRef Path = PathTwine.toStringRef(PathStorage); | ||
SmallString<128> TableNameStorage; | ||
StringRef TableName = TableNameTwine.toStringRef(TableNameStorage); | ||
|
||
// Constructor for if the file doesn't exist. | ||
auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { | ||
auto Store = | ||
DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize); | ||
if (LLVM_UNLIKELY(!Store)) | ||
return Store.takeError(); | ||
|
||
if (auto E = DB.addTable(*Store)) | ||
return E; | ||
|
||
if (UserHeaderSize) | ||
UserHeaderInit(Store->getUserHeader().data()); | ||
return Error::success(); | ||
}; | ||
|
||
// Get or create the file. | ||
Expected<DatabaseFile> File = | ||
DatabaseFile::create(Path, MaxFileSize, NewDBConstructor); | ||
if (!File) | ||
return File.takeError(); | ||
|
||
// Find the table and validate it. | ||
std::optional<TableHandle> Table = File->findTable(TableName); | ||
if (!Table) | ||
return createTableConfigError(std::errc::argument_out_of_domain, Path, | ||
TableName, "table not found"); | ||
if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind, | ||
(size_t)Table->getHeader().Kind, Path, TableName)) | ||
return std::move(E); | ||
auto Store = Table->cast<DataAllocatorHandle>(); | ||
assert(Store && "Already checked the kind"); | ||
|
||
// Success. | ||
OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store}; | ||
return OnDiskDataAllocator(std::make_unique<ImplType>(std::move(Impl))); | ||
} | ||
|
||
Expected<OnDiskDataAllocator::pointer> | ||
OnDiskDataAllocator::allocate(size_t Size) { | ||
auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); | ||
if (LLVM_UNLIKELY(!Data)) | ||
return Data.takeError(); | ||
|
||
return pointer(FileOffset(Data->data() - Impl->Store.getRegion().data()), | ||
*Data); | ||
} | ||
|
||
const char *OnDiskDataAllocator::beginData(FileOffset Offset) const { | ||
assert(Offset); | ||
assert(Impl); | ||
assert(Offset.get() < Impl->File.getAlloc().size()); | ||
return Impl->File.getRegion().data() + Offset.get(); | ||
} | ||
|
||
MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { | ||
return Impl->Store.getUserHeader(); | ||
} | ||
|
||
size_t OnDiskDataAllocator::size() const { return Impl->File.size(); } | ||
size_t OnDiskDataAllocator::capacity() const { | ||
return Impl->File.getRegion().size(); | ||
} | ||
|
||
OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl) | ||
: Impl(std::move(Impl)) {} | ||
|
||
#else // !LLVM_ENABLE_ONDISK_CAS | ||
|
||
struct OnDiskDataAllocator::ImplType {}; | ||
|
||
Expected<OnDiskDataAllocator> OnDiskDataAllocator::create( | ||
const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, | ||
std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize, | ||
function_ref<void(void *)> UserHeaderInit) { | ||
return createStringError(make_error_code(std::errc::not_supported), | ||
"OnDiskDataAllocator is not supported"); | ||
} | ||
|
||
Expected<OnDiskDataAllocator::pointer> | ||
OnDiskDataAllocator::allocate(size_t Size) { | ||
return createStringError(make_error_code(std::errc::not_supported), | ||
"OnDiskDataAllocator is not supported"); | ||
} | ||
|
||
const char *OnDiskDataAllocator::beginData(FileOffset Offset) const { | ||
return nullptr; | ||
} | ||
|
||
MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; } | ||
|
||
size_t OnDiskDataAllocator::size() const { return 0; } | ||
size_t OnDiskDataAllocator::capacity() const { return 0; } | ||
|
||
#endif // LLVM_ENABLE_ONDISK_CAS |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "llvm/CAS/OnDiskDataAllocator.h" | ||
#include "llvm/CAS/MappedFileRegionArena.h" | ||
#include "llvm/Config/llvm-config.h" | ||
#include "llvm/Support/Alignment.h" | ||
#include "llvm/Testing/Support/Error.h" | ||
#include "llvm/Testing/Support/SupportHelpers.h" | ||
|
||
#if LLVM_ENABLE_ONDISK_CAS | ||
|
||
using namespace llvm; | ||
using namespace llvm::cas; | ||
|
||
TEST(OnDiskDataAllocatorTest, Allocate) { | ||
unittest::TempDir Temp("data-allocator", /*Unique=*/true); | ||
constexpr size_t MB = 1024u * 1024u; | ||
|
||
std::optional<OnDiskDataAllocator> Allocator; | ||
ASSERT_THAT_ERROR(OnDiskDataAllocator::create( | ||
Temp.path("allocator"), "data", /*MaxFileSize=*/MB, | ||
/*NewFileInitialSize=*/std::nullopt) | ||
.moveInto(Allocator), | ||
Succeeded()); | ||
|
||
// Allocate. | ||
{ | ||
for (size_t Size = 1; Size < 16; ++Size) { | ||
OnDiskDataAllocator::pointer P; | ||
ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded()); | ||
ASSERT_TRUE( | ||
isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get())); | ||
cachemeifyoucan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
// Out of space. | ||
{ | ||
OnDiskDataAllocator::pointer P; | ||
ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed()); | ||
} | ||
|
||
// Check size and capacity. | ||
{ | ||
ASSERT_EQ(Allocator->capacity(), MB); | ||
ASSERT_LE(Allocator->size(), MB); | ||
} | ||
} | ||
|
||
#endif // LLVM_ENABLE_ONDISK_CAS |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.