Skip to content

Commit 14ee8b9

Browse files
committed
[CAS] Avoid using mmap for storeFromOpenFileImpl
`mmap` is unsafe since the contents of the mapped region may change after we get the contents digest. This manifested as an issue in linux, where all the tests doing ``` llvm-cas --cas %t/cas --ingest --data %t ``` were ingesting the files of the CAS directory itself, and were creating invalid objects.
1 parent 2c9027b commit 14ee8b9

File tree

5 files changed

+56
-56
lines changed

5 files changed

+56
-56
lines changed

llvm/include/llvm/CAS/ObjectStore.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ class ObjectStore {
172172
/// Get ObjectRef from open file.
173173
virtual Expected<ObjectRef>
174174
storeFromOpenFileImpl(sys::fs::file_t FD,
175-
Optional<sys::fs::file_status> Status) = 0;
175+
Optional<sys::fs::file_status> Status);
176176

177177
/// Get a lifetime-extended StringRef pointing at \p Data.
178178
///
@@ -246,6 +246,9 @@ class ObjectStore {
246246
return Data.size();
247247
}
248248

249+
/// Validate the whole node tree.
250+
Error validateTree(ObjectRef Ref);
251+
249252
/// Print the ObjectStore internals for debugging purpose.
250253
virtual void print(raw_ostream &) const {}
251254
void dump() const;

llvm/lib/CAS/BuiltinCAS.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -49,57 +49,6 @@ const BuiltinCASContext &BuiltinCASContext::getDefaultContext() {
4949
return DefaultContext;
5050
}
5151

52-
static size_t getPageSize() {
53-
static int PageSize = sys::Process::getPageSizeEstimate();
54-
return PageSize;
55-
}
56-
57-
Expected<ObjectRef>
58-
BuiltinCAS::storeFromOpenFileImpl(sys::fs::file_t FD,
59-
Optional<sys::fs::file_status> Status) {
60-
int PageSize = getPageSize();
61-
62-
if (!Status) {
63-
Status.emplace();
64-
if (std::error_code EC = sys::fs::status(FD, *Status))
65-
return errorCodeToError(EC);
66-
}
67-
68-
constexpr size_t MinMappedSize = 4 * 4096;
69-
auto readWithStream = [&]() -> Expected<ObjectRef> {
70-
// FIXME: MSVC: SmallString<MinMappedSize * 2>
71-
SmallString<4 * 4096 * 2> Data;
72-
if (Error E = sys::fs::readNativeFileToEOF(FD, Data, MinMappedSize))
73-
return std::move(E);
74-
return store(None, makeArrayRef(Data.data(), Data.size()));
75-
};
76-
77-
// Check whether we can trust the size from stat.
78-
if (Status->type() != sys::fs::file_type::regular_file &&
79-
Status->type() != sys::fs::file_type::block_file)
80-
return readWithStream();
81-
82-
if (Status->getSize() < MinMappedSize)
83-
return readWithStream();
84-
85-
std::error_code EC;
86-
sys::fs::mapped_file_region Map(FD, sys::fs::mapped_file_region::readonly,
87-
Status->getSize(),
88-
/*offset=*/0, EC);
89-
if (EC)
90-
return errorCodeToError(EC);
91-
92-
// If the file is guaranteed to be null-terminated, use it directly. Note
93-
// that the file size may have changed from ::stat if this file is volatile,
94-
// so we need to check for an actual null character at the end.
95-
ArrayRef<char> Data(Map.data(), Map.size());
96-
HashType ComputedHash =
97-
BuiltinObjectHasher<HasherT>::hashObject(*this, None, Data);
98-
if (!isAligned(Align(PageSize), Data.size()) && Data.end()[0] == 0)
99-
return storeFromNullTerminatedRegion(ComputedHash, std::move(Map));
100-
return storeImpl(ComputedHash, None, Data);
101-
}
102-
10352
Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,
10453
ArrayRef<char> Data) {
10554
return storeImpl(BuiltinObjectHasher<HasherT>::hashObject(*this, Refs, Data),

llvm/lib/CAS/BuiltinCAS.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,6 @@ class BuiltinCAS : public ObjectStore {
9696
ArrayRef<ObjectRef> Refs,
9797
ArrayRef<char> Data) = 0;
9898

99-
Expected<ObjectRef>
100-
storeFromOpenFileImpl(sys::fs::file_t FD,
101-
Optional<sys::fs::file_status> Status) override;
10299
virtual Expected<ObjectRef>
103100
storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
104101
sys::fs::mapped_file_region Map) {

llvm/lib/CAS/HierarchicalTreeBuilder.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,5 +255,12 @@ Expected<ObjectProxy> HierarchicalTreeBuilder::create(ObjectStore &CAS) {
255255
T->Ref = ExpectedTree->getRef();
256256
}
257257

258-
return cantFail(CAS.getProxy(*Root.Ref));
258+
Expected<ObjectProxy> Obj = cantFail(CAS.getProxy(*Root.Ref));
259+
#ifndef NDEBUG
260+
if (Obj) {
261+
if (Error E = CAS.validateTree(Obj->getRef()))
262+
return std::move(E);
263+
}
264+
#endif
265+
return Obj;
259266
}

llvm/lib/CAS/ObjectStore.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/CAS/ObjectStore.h"
10+
#include "llvm/ADT/DenseSet.h"
1011
#include "llvm/ADT/StringMap.h"
1112
#include "llvm/Support/Debug.h"
1213
#include "llvm/Support/FileSystem.h"
@@ -98,6 +99,49 @@ Expected<ObjectProxy> ObjectStore::createProxy(ArrayRef<ObjectRef> Refs,
9899
return getProxy(*Ref);
99100
}
100101

102+
Expected<ObjectRef>
103+
ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD,
104+
Optional<sys::fs::file_status> Status) {
105+
// Copy the file into an immutable memory buffer and call \c store on that.
106+
// Using \c mmap would be unsafe because there's a race window between when we
107+
// get the digest hash for the \c mmap contents and when we store the data; if
108+
// the file changes in-between we will create an invalid object.
109+
110+
// FIXME: For the on-disk CAS implementation use cloning to store it as a
111+
// standalone file if the file-system supports it and the file is large.
112+
113+
constexpr size_t ChunkSize = 4 * 4096;
114+
SmallString<0> Data;
115+
Data.reserve(ChunkSize * 2);
116+
if (Error E = sys::fs::readNativeFileToEOF(FD, Data, ChunkSize))
117+
return std::move(E);
118+
return store(std::nullopt, ArrayRef(Data.data(), Data.size()));
119+
}
120+
121+
Error ObjectStore::validateTree(ObjectRef Root) {
122+
SmallDenseSet<ObjectRef> ValidatedRefs;
123+
SmallVector<ObjectRef, 16> RefsToValidate;
124+
RefsToValidate.push_back(Root);
125+
126+
while (!RefsToValidate.empty()) {
127+
ObjectRef Ref = RefsToValidate.pop_back_val();
128+
auto [I, Inserted] = ValidatedRefs.insert(Ref);
129+
if (!Inserted)
130+
continue; // already validated.
131+
if (Error E = validate(getID(Ref)))
132+
return E;
133+
Expected<ObjectHandle> Obj = load(Ref);
134+
if (!Obj)
135+
return Obj.takeError();
136+
if (Error E = forEachRef(*Obj, [&RefsToValidate](ObjectRef R) -> Error {
137+
RefsToValidate.push_back(R);
138+
return Error::success();
139+
}))
140+
return E;
141+
}
142+
return Error::success();
143+
}
144+
101145
std::unique_ptr<MemoryBuffer>
102146
ObjectProxy::getMemoryBuffer(StringRef Name,
103147
bool RequiresNullTerminator) const {

0 commit comments

Comments
 (0)