diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h
index 2809df800621b..b7099dce2069d 100644
--- a/llvm/include/llvm/CAS/OnDiskDataAllocator.h
+++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h
@@ -64,7 +64,7 @@ class OnDiskDataAllocator {
 
   /// \returns the buffer that was allocated at \p create time, with size
   /// \p UserHeaderSize.
-  MutableArrayRef<uint8_t> getUserHeader();
+  MutableArrayRef<uint8_t> getUserHeader() const;
 
   size_t size() const;
   size_t capacity() const;
diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h
new file mode 100644
index 0000000000000..83017a6a54fee
--- /dev/null
+++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h
@@ -0,0 +1,469 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
+/// hash. This is the class that implements the database storage scheme without
+/// exposing the hashing algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_ONDISKGRAPHDB_H
+#define LLVM_CAS_ONDISKGRAPHDB_H
+
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+
+namespace llvm::cas::ondisk {
+
+/// Standard 8 byte reference inside OnDiskGraphDB.
+class InternalRef {
+public:
+  FileOffset getFileOffset() const { return FileOffset(Data); }
+  uint64_t getRawData() const { return Data; }
+
+  static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); }
+  static InternalRef getFromOffset(FileOffset Offset) {
+    return InternalRef(Offset.get());
+  }
+
+  friend bool operator==(InternalRef LHS, InternalRef RHS) {
+    return LHS.Data == RHS.Data;
+  }
+
+private:
+  InternalRef(FileOffset Offset) : Data((uint64_t)Offset.get()) {}
+  InternalRef(uint64_t Data) : Data(Data) {}
+  uint64_t Data;
+};
+
+/// Compact 4 byte reference inside OnDiskGraphDB for smaller references.
+class InternalRef4B {
+public:
+  FileOffset getFileOffset() const { return FileOffset(Data); }
+  uint32_t getRawData() const { return Data; }
+
+  /// Shrink to 4B reference.
+  static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) {
+    uint64_t Offset = Ref.getRawData();
+    if (Offset > UINT32_MAX)
+      return std::nullopt;
+    return InternalRef4B(Offset);
+  }
+
+  operator InternalRef() const {
+    return InternalRef::getFromOffset(getFileOffset());
+  }
+
+private:
+  friend class InternalRef;
+  InternalRef4B(uint32_t Data) : Data(Data) {}
+  uint32_t Data;
+};
+
+/// Array of internal node references.
+class InternalRefArrayRef {
+public:
+  size_t size() const { return Size; }
+  bool empty() const { return !Size; }
+
+  class iterator
+      : public iterator_facade_base<iterator, std::random_access_iterator_tag,
+                                    const InternalRef> {
+  public:
+    bool operator==(const iterator &RHS) const { return I == RHS.I; }
+    InternalRef operator*() const {
+      if (auto *Ref = dyn_cast<const InternalRef *>(I))
+        return *Ref;
+      return InternalRef(*cast<const InternalRef4B *>(I));
+    }
+    bool operator<(const iterator &RHS) const {
+      assert(isa<const InternalRef *>(I) == isa<const InternalRef *>(RHS.I));
+      if (auto *Ref = dyn_cast<const InternalRef *>(I))
+        return Ref < cast<const InternalRef *>(RHS.I);
+      return cast<const InternalRef4B *>(I) -
+             cast<const InternalRef4B *>(RHS.I);
+    }
+    ptrdiff_t operator-(const iterator &RHS) const {
+      assert(isa<const InternalRef *>(I) == isa<const InternalRef *>(RHS.I));
+      if (auto *Ref = dyn_cast<const InternalRef *>(I))
+        return Ref - cast<const InternalRef *>(RHS.I);
+      return cast<const InternalRef4B *>(I) -
+             cast<const InternalRef4B *>(RHS.I);
+    }
+    iterator &operator+=(ptrdiff_t N) {
+      if (auto *Ref = dyn_cast<const InternalRef *>(I))
+        I = Ref + N;
+      else
+        I = cast<const InternalRef4B *>(I) + N;
+      return *this;
+    }
+    iterator &operator-=(ptrdiff_t N) {
+      if (auto *Ref = dyn_cast<const InternalRef *>(I))
+        I = Ref - N;
+      else
+        I = cast<const InternalRef4B *>(I) - N;
+      return *this;
+    }
+    InternalRef operator[](ptrdiff_t N) const { return *(this->operator+(N)); }
+
+    iterator() = default;
+
+    uint64_t getOpaqueData() const { return uintptr_t(I.getOpaqueValue()); }
+
+    static iterator fromOpaqueData(uint64_t Opaque) {
+      return iterator(
+          PointerUnion<const InternalRef *,
+                       const InternalRef4B *>::getFromOpaqueValue((void *)
+                                                                      Opaque));
+    }
+
+  private:
+    friend class InternalRefArrayRef;
+    explicit iterator(
+        PointerUnion<const InternalRef *, const InternalRef4B *> I)
+        : I(I) {}
+    PointerUnion<const InternalRef *, const InternalRef4B *> I;
+  };
+
+  bool operator==(const InternalRefArrayRef &RHS) const {
+    return size() == RHS.size() && std::equal(begin(), end(), RHS.begin());
+  }
+
+  iterator begin() const { return iterator(Begin); }
+  iterator end() const { return begin() + Size; }
+
+  /// Array accessor.
+  InternalRef operator[](ptrdiff_t N) const { return begin()[N]; }
+
+  bool is4B() const { return isa<const InternalRef4B *>(Begin); }
+  bool is8B() const { return isa<const InternalRef *>(Begin); }
+
+  ArrayRef<uint8_t> getBuffer() const {
+    if (is4B()) {
+      auto *B = cast<const InternalRef4B *>(Begin);
+      return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size);
+    }
+    auto *B = cast<const InternalRef *>(Begin);
+    return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
+  }
+
+  InternalRefArrayRef(std::nullopt_t = std::nullopt) {
+    // This is useful so that all the casts in the \p iterator functions can
+    // operate without needing to check for a null value.
+    static InternalRef PlaceHolder = InternalRef::getFromRawData(0);
+    Begin = &PlaceHolder;
+  }
+
+  InternalRefArrayRef(ArrayRef<InternalRef> Refs)
+      : Begin(Refs.begin()), Size(Refs.size()) {}
+
+  InternalRefArrayRef(ArrayRef<InternalRef4B> Refs)
+      : Begin(Refs.begin()), Size(Refs.size()) {}
+
+private:
+  PointerUnion<const InternalRef *, const InternalRef4B *> Begin;
+  size_t Size = 0;
+};
+
+/// Reference to a node. The node's data may not be stored in the database.
+/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance
+/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not
+/// comparable.
+class ObjectID {
+public:
+  uint64_t getOpaqueData() const { return Opaque; }
+
+  static ObjectID fromOpaqueData(uint64_t Opaque) { return ObjectID(Opaque); }
+
+  friend bool operator==(const ObjectID &LHS, const ObjectID &RHS) {
+    return LHS.Opaque == RHS.Opaque;
+  }
+  friend bool operator!=(const ObjectID &LHS, const ObjectID &RHS) {
+    return !(LHS == RHS);
+  }
+
+private:
+  explicit ObjectID(uint64_t Opaque) : Opaque(Opaque) {}
+  uint64_t Opaque;
+};
+
+/// Handle for a loaded node object.
+class ObjectHandle {
+public:
+  explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {}
+  uint64_t getOpaqueData() const { return Opaque; }
+
+  static ObjectHandle fromFileOffset(FileOffset Offset);
+  static ObjectHandle fromMemory(uintptr_t Ptr);
+
+  friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) {
+    return LHS.Opaque == RHS.Opaque;
+  }
+  friend bool operator!=(const ObjectHandle &LHS, const ObjectHandle &RHS) {
+    return !(LHS == RHS);
+  }
+
+private:
+  uint64_t Opaque;
+};
+
+/// Iterator for ObjectID.
+class object_refs_iterator
+    : public iterator_facade_base<object_refs_iterator,
+                                  std::random_access_iterator_tag, ObjectID> {
+public:
+  bool operator==(const object_refs_iterator &RHS) const { return I == RHS.I; }
+  ObjectID operator*() const {
+    return ObjectID::fromOpaqueData((*I).getRawData());
+  }
+  bool operator<(const object_refs_iterator &RHS) const { return I < RHS.I; }
+  ptrdiff_t operator-(const object_refs_iterator &RHS) const {
+    return I - RHS.I;
+  }
+  object_refs_iterator &operator+=(ptrdiff_t N) {
+    I += N;
+    return *this;
+  }
+  object_refs_iterator &operator-=(ptrdiff_t N) {
+    I -= N;
+    return *this;
+  }
+  ObjectID operator[](ptrdiff_t N) const { return *(this->operator+(N)); }
+
+  object_refs_iterator() = default;
+  object_refs_iterator(InternalRefArrayRef::iterator I) : I(I) {}
+
+  uint64_t getOpaqueData() const { return I.getOpaqueData(); }
+
+  static object_refs_iterator fromOpaqueData(uint64_t Opaque) {
+    return InternalRefArrayRef::iterator::fromOpaqueData(Opaque);
+  }
+
+private:
+  InternalRefArrayRef::iterator I;
+};
+
+using object_refs_range = llvm::iterator_range<object_refs_iterator>;
+
+/// On-disk CAS nodes database, independent of a particular hashing algorithm.
+class OnDiskGraphDB {
+public:
+  /// Associate data & references with a particular object ID. If there is
+  /// already a record for this object the operation is a no-op. \param ID the
+  /// object ID to associate the data & references with. \param Refs references
+  /// \param Data data buffer.
+  Error store(ObjectID ID, ArrayRef<ObjectID> Refs, ArrayRef<char> Data);
+
+  /// \returns \p nullopt if the object associated with \p Ref does not exist.
+  Expected<std::optional<ObjectHandle>> load(ObjectID Ref);
+
+  /// \returns the hash bytes digest for the object reference.
+  ArrayRef<uint8_t> getDigest(ObjectID Ref) const {
+    // ObjectID should be valid to fetch Digest.
+    return cantFail(getDigest(getInternalRef(Ref)));
+  }
+
+  /// Form a reference for the provided hash. The reference can be used as part
+  /// of a CAS object even if it's not associated with an object yet.
+  Expected<ObjectID> getReference(ArrayRef<uint8_t> Hash);
+
+  /// Get an existing reference to the object \p Digest.
+  ///
+  /// Returns \p nullopt if the object is not stored in this CAS.
+  std::optional<ObjectID> getExistingReference(ArrayRef<uint8_t> Digest);
+
+  /// Check whether the object associated with \p Ref is stored in the CAS.
+  /// Note that this function will fault-in according to the policy.
+  Expected<bool> isMaterialized(ObjectID Ref);
+
+  /// Check whether the object associated with \p Ref is stored in the CAS.
+  /// Note that this function does not fault-in.
+  bool containsObject(ObjectID Ref) const {
+    return containsObject(Ref, /*CheckUpstream=*/true);
+  }
+
+  /// \returns the data part of the provided object handle.
+  ArrayRef<char> getObjectData(ObjectHandle Node) const;
+
+  /// \returns the object referenced by the provided object handle.
+  object_refs_range getObjectRefs(ObjectHandle Node) const {
+    InternalRefArrayRef Refs = getInternalRefs(Node);
+    return make_range(Refs.begin(), Refs.end());
+  }
+
+  /// \returns Total size of stored objects.
+  ///
+  /// NOTE: There's a possibility that the returned size is not including a
+  /// large object if the process crashed right at the point of inserting it.
+  size_t getStorageSize() const;
+
+  /// \returns The precentage of space utilization of hard space limits.
+  ///
+  /// Return value is an integer between 0 and 100 for percentage.
+  unsigned getHardStorageLimitUtilization() const;
+
+  void print(raw_ostream &OS) const;
+
+  /// Hashing function type for validation.
+  using HashingFuncT = function_ref<void(
+      ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>;
+
+  /// Validate the OnDiskGraphDB.
+  ///
+  /// \param Deep if true, rehash all the objects to ensure no data
+  /// corruption in stored objects, otherwise just validate the structure of
+  /// CAS database.
+  /// \param Hasher is the hashing function used for objects inside CAS.
+  Error validate(bool Deep, HashingFuncT Hasher) const;
+
+  /// How to fault-in nodes if an upstream database is used.
+  enum class FaultInPolicy {
+    /// Copy only the requested node.
+    SingleNode,
+    /// Copy the the entire graph of a node.
+    FullTree,
+  };
+
+  /// Open the on-disk store from a directory.
+  ///
+  /// \param Path directory for the on-disk store. The directory will be created
+  /// if it doesn't exist.
+  /// \param HashName Identifier name for the hashing algorithm that is going to
+  /// be used.
+  /// \param HashByteSize Size for the object digest hash bytes.
+  /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes
+  /// if they don't exist in the primary store. The upstream store is only used
+  /// for reading nodes, new nodes are only written to the primary store.
+  /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied
+  /// to primary store. This is recorded at creation time and subsequent opens
+  /// need to pass the same policy otherwise the \p open will fail.
+  static Expected<std::unique_ptr<OnDiskGraphDB>>
+  open(StringRef Path, StringRef HashName, unsigned HashByteSize,
+       std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr,
+       FaultInPolicy Policy = FaultInPolicy::FullTree);
+
+  ~OnDiskGraphDB();
+
+private:
+  /// Forward declaration for a proxy for an ondisk index record.
+  struct IndexProxy;
+
+  enum class ObjectPresence {
+    Missing,
+    InPrimaryDB,
+    OnlyInUpstreamDB,
+  };
+
+  /// Check if object exists and if it is on upstream only.
+  Expected<ObjectPresence> getObjectPresence(ObjectID Ref,
+                                             bool CheckUpstream) const;
+
+  /// \returns true if object can be found in database.
+  bool containsObject(ObjectID Ref, bool CheckUpstream) const {
+    auto Presence = getObjectPresence(Ref, CheckUpstream);
+    if (!Presence) {
+      consumeError(Presence.takeError());
+      return false;
+    }
+    switch (*Presence) {
+    case ObjectPresence::Missing:
+      return false;
+    case ObjectPresence::InPrimaryDB:
+      return true;
+    case ObjectPresence::OnlyInUpstreamDB:
+      return true;
+    }
+  }
+
+  /// When \p load is called for a node that doesn't exist, this function tries
+  /// to load it from the upstream store and copy it to the primary one.
+  Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);
+
+  /// Import the entire tree from upstream with \p UpstreamNode as root.
+  Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode);
+  /// Import only the \param UpstreamNode.
+  Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode);
+
+  /// Found the IndexProxy for the hash.
+  Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash);
+
+  /// Get path for creating standalone data file.
+  void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
+                         SmallVectorImpl<char> &Path) const;
+  /// Create a standalone leaf file.
+  Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data);
+
+  /// \name Helper functions for internal data structures.
+  /// \{
+  static InternalRef getInternalRef(ObjectID Ref) {
+    return InternalRef::getFromRawData(Ref.getOpaqueData());
+  }
+
+  static ObjectID getExternalReference(InternalRef Ref) {
+    return ObjectID::fromOpaqueData(Ref.getRawData());
+  }
+
+  static ObjectID getExternalReference(const IndexProxy &I);
+
+  static InternalRef makeInternalRef(FileOffset IndexOffset);
+
+  Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const;
+
+  ArrayRef<uint8_t> getDigest(const IndexProxy &I) const;
+
+  Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const;
+
+  IndexProxy
+  getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const;
+
+  InternalRefArrayRef getInternalRefs(ObjectHandle Node) const;
+  /// \}
+
+  /// Get the atomic variable that keeps track of the standalone data storage
+  /// size.
+  std::atomic<uint64_t> &standaloneStorageSize() const;
+
+  /// Increase the standalone data size.
+  void recordStandaloneSizeIncrease(size_t SizeIncrease);
+  /// Get the standalone data size.
+  uint64_t getStandaloneStorageSize() const;
+
+  // Private constructor.
+  OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
+                OnDiskDataAllocator DataPool,
+                std::unique_ptr<OnDiskGraphDB> UpstreamDB,
+                FaultInPolicy Policy);
+
+  /// Mapping from hash to object reference.
+  ///
+  /// Data type is TrieRecord.
+  OnDiskTrieRawHashMap Index;
+
+  /// Storage for most objects.
+  ///
+  /// Data type is DataRecordHandle.
+  OnDiskDataAllocator DataPool;
+
+  /// A StandaloneDataMap.
+  void *StandaloneData = nullptr;
+
+  /// Path to the root directory.
+  std::string RootPath;
+
+  /// Optional on-disk store to be used for faulting-in nodes.
+  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
+
+  /// The policy used to fault in data from upstream.
+  FaultInPolicy FIPolicy;
+};
+
+} // namespace llvm::cas::ondisk
+
+#endif // LLVM_CAS_ONDISKGRAPHDB_H
diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
new file mode 100644
index 0000000000000..b762518366c21
--- /dev/null
+++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This declares OnDiskKeyValueDB, a key value storage database of fixed size
+/// key and value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H
+#define LLVM_CAS_ONDISKKEYVALUEDB_H
+
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+
+namespace llvm::cas::ondisk {
+
+/// An on-disk key-value data store with the following properties:
+/// * Keys are fixed length binary hashes with expected normal distribution.
+/// * Values are buffers of the same size, specified at creation time.
+/// * The value of a key cannot be changed once it is set.
+/// * The value buffers returned from a key lookup have 8-byte alignment.
+class OnDiskKeyValueDB {
+public:
+  /// Associate a value with a key.
+  ///
+  /// \param Key the hash bytes for the key
+  /// \param Value the value bytes, same size as \p ValueSize parameter of
+  /// \p open call.
+  ///
+  /// \returns the value associated with the \p Key. It may be different than
+  /// \p Value if another value is already associated with this key.
+  Expected<ArrayRef<char>> put(ArrayRef<uint8_t> Key, ArrayRef<char> Value);
+
+  /// \returns the value associated with the \p Key, or \p std::nullopt if the
+  /// key does not exist.
+  Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key);
+
+  /// \returns Total size of stored data.
+  size_t getStorageSize() const { return Cache.size(); }
+
+  /// \returns The precentage of space utilization of hard space limits.
+  ///
+  /// Return value is an integer between 0 and 100 for percentage.
+  unsigned getHardStorageLimitUtilization() const {
+    return Cache.size() * 100ULL / Cache.capacity();
+  }
+
+  /// Open the on-disk store from a directory.
+  ///
+  /// \param Path directory for the on-disk store. The directory will be created
+  /// if it doesn't exist.
+  /// \param HashName Identifier name for the hashing algorithm that is going to
+  /// be used.
+  /// \param KeySize Size for the key hash bytes.
+  /// \param ValueName Identifier name for the values.
+  /// \param ValueSize Size for the value bytes.
+  static Expected<std::unique_ptr<OnDiskKeyValueDB>>
+  open(StringRef Path, StringRef HashName, unsigned KeySize,
+       StringRef ValueName, size_t ValueSize);
+
+  using CheckValueT =
+      function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
+  /// Validate the storage with a callback \p CheckValue to check the stored
+  /// value.
+  Error validate(CheckValueT CheckValue) const;
+
+private:
+  OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache)
+      : ValueSize(ValueSize), Cache(std::move(Cache)) {}
+
+  const size_t ValueSize;
+  OnDiskTrieRawHashMap Cache;
+};
+
+} // namespace llvm::cas::ondisk
+
+#endif // LLVM_CAS_ONDISKKEYVALUEDB_H
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index bca39b645af45..a2f8c49e50145 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -8,6 +8,8 @@ add_llvm_component_library(LLVMCAS
   ObjectStore.cpp
   OnDiskCommon.cpp
   OnDiskDataAllocator.cpp
+  OnDiskGraphDB.cpp
+  OnDiskKeyValueDB.cpp
   OnDiskTrieRawHashMap.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp
index 25aa06bfe64da..281bde981457b 100644
--- a/llvm/lib/CAS/OnDiskCommon.cpp
+++ b/llvm/lib/CAS/OnDiskCommon.cpp
@@ -7,9 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "OnDiskCommon.h"
-#include "llvm/Config/config.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Process.h"
+#include <mutex>
 #include <thread>
 
 #if __has_include(<sys/file.h>)
@@ -25,8 +26,44 @@
 #include <fcntl.h>
 #endif
 
+#if __has_include(<sys/mount.h>)
+#include <sys/mount.h> // statfs
+#endif
+
 using namespace llvm;
 
+static uint64_t OnDiskCASMaxMappingSize = 0;
+
+Expected<std::optional<uint64_t>> cas::ondisk::getOverriddenMaxMappingSize() {
+  static std::once_flag Flag;
+  Error Err = Error::success();
+  std::call_once(Flag, [&Err] {
+    ErrorAsOutParameter EAO(&Err);
+    constexpr const char *EnvVar = "LLVM_CAS_MAX_MAPPING_SIZE";
+    auto Value = sys::Process::GetEnv(EnvVar);
+    if (!Value)
+      return;
+
+    uint64_t Size;
+    if (StringRef(*Value).getAsInteger(/*auto*/ 0, Size))
+      Err = createStringError(inconvertibleErrorCode(),
+                              "invalid value for %s: expected integer", EnvVar);
+    OnDiskCASMaxMappingSize = Size;
+  });
+
+  if (Err)
+    return std::move(Err);
+
+  if (OnDiskCASMaxMappingSize == 0)
+    return std::nullopt;
+
+  return OnDiskCASMaxMappingSize;
+}
+
+void cas::ondisk::setMaxMappingSize(uint64_t Size) {
+  OnDiskCASMaxMappingSize = Size;
+}
+
 std::error_code cas::ondisk::lockFileThreadSafe(int FD,
                                                 sys::fs::LockKind Kind) {
 #if HAVE_FLOCK
@@ -125,3 +162,20 @@ Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize,
   return NewSize;    // Pretend it worked.
 #endif
 }
+
+bool cas::ondisk::useSmallMappingSize(const Twine &P) {
+  // Add exceptions to use small database file here.
+#if defined(__APPLE__) && __has_include(<sys/mount.h>)
+  // macOS tmpfs does not support sparse tails.
+  SmallString<128> PathStorage;
+  StringRef Path = P.toNullTerminatedStringRef(PathStorage);
+  struct statfs StatFS;
+  if (statfs(Path.data(), &StatFS) != 0)
+    return false;
+
+  if (strcmp(StatFS.f_fstypename, "tmpfs") == 0)
+    return true;
+#endif
+  // Default to use regular database file.
+  return false;
+}
diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h
index 8b79ffe5c3158..ac00662a2e91e 100644
--- a/llvm/lib/CAS/OnDiskCommon.h
+++ b/llvm/lib/CAS/OnDiskCommon.h
@@ -12,9 +12,31 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include <chrono>
+#include <optional>
 
 namespace llvm::cas::ondisk {
 
+/// The version for all the ondisk database files. It needs to be bumped when
+/// compatibility breaking changes are introduced.
+constexpr StringLiteral CASFormatVersion = "v1";
+
+/// Retrieves an overridden maximum mapping size for CAS files, if any,
+/// speicified by LLVM_CAS_MAX_MAPPING_SIZE in the environment or set by
+/// `setMaxMappingSize()`. If the value from environment is unreadable, returns
+/// an error.
+Expected<std::optional<uint64_t>> getOverriddenMaxMappingSize();
+
+/// Set MaxMappingSize for ondisk CAS. This function is not thread-safe and
+/// should be set before creaing any ondisk CAS and does not affect CAS already
+/// created. Set value 0 to use default size.
+void setMaxMappingSize(uint64_t Size);
+
+/// Whether to use a small file mapping for ondisk databases created in \p Path.
+///
+/// For some file system that doesn't support sparse file, use a smaller file
+/// mapping to avoid consuming too much disk space on creation.
+bool useSmallMappingSize(const Twine &Path);
+
 /// Thread-safe alternative to \c sys::fs::lockFile. This does not support all
 /// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library
 /// for now.
diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp
index 13bbd66139178..c9f2907339a23 100644
--- a/llvm/lib/CAS/OnDiskDataAllocator.cpp
+++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp
@@ -185,7 +185,7 @@ Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
   return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size};
 }
 
-MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() const {
   return Impl->Store.getUserHeader();
 }
 
diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
new file mode 100644
index 0000000000000..72bb98c4bf65d
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -0,0 +1,1755 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
+/// independent of a particular hashing algorithm. It only needs to be
+/// configured for the hash size and controls the schema of the storage.
+///
+/// OnDiskGraphDB defines:
+///
+/// - How the data is stored inside database, either as a standalone file, or
+///   allocated inside a datapool.
+/// - How references to other objects inside the same database is stored. They
+///   are stored as internal references, instead of full hash value to save
+///   space.
+/// - How to chain databases together and import objects from upstream
+///   databases.
+///
+/// Here's a top-level description of the current layout:
+///
+/// - db/index.<version>: a file for the "index" table, named by \a
+///   IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
+///   that are accessed atomically, describing the object kind and where/how
+///   it's stored (including an optional file offset). See \a TrieRecord for
+///   more details.
+/// - db/data.<version>: a file for the "data" table, named by \a
+///   DataPoolTableName and managed by \a DataStore. New objects within
+///   TrieRecord::MaxEmbeddedSize are inserted here as \a
+///   TrieRecord::StorageKind::DataPool.
+/// - db/obj.<offset>.<version>: a file storing an object outside the main
+///   "data" table, named by its offset into the "index" table, with the
+///   format of \a TrieRecord::StorageKind::Standalone.
+/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
+///   main "data" table, named by its offset into the "index" table, with
+///   the format of \a TrieRecord::StorageKind::StandaloneLeaf.
+/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
+///   outside the main "data" table, named by its offset into the "index" table,
+///   with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include <atomic>
+#include <mutex>
+#include <optional>
+
+#define DEBUG_TYPE "on-disk-cas"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+static constexpr StringLiteral IndexTableName = "llvm.cas.index";
+static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
+
+static constexpr StringLiteral IndexFilePrefix = "index.";
+static constexpr StringLiteral DataPoolFilePrefix = "data.";
+
+static constexpr StringLiteral FilePrefixObject = "obj.";
+static constexpr StringLiteral FilePrefixLeaf = "leaf.";
+static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
+
+static Error createCorruptObjectError(Expected<ArrayRef<uint8_t>> ID) {
+  if (!ID)
+    return ID.takeError();
+
+  return createStringError(llvm::errc::invalid_argument,
+                           "corrupt object '" + toHex(*ID) + "'");
+}
+
+namespace {
+
+/// Trie record data: 8 bytes, atomic<uint64_t>
+/// - 1-byte: StorageKind
+/// - 7-bytes: DataStoreOffset (offset into referenced file)
+class TrieRecord {
+public:
+  enum class StorageKind : uint8_t {
+    /// Unknown object.
+    Unknown = 0,
+
+    /// data.vX: main pool, full DataStore record.
+    DataPool = 1,
+
+    /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
+    Standalone = 10,
+
+    /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
+    /// exactly the data content and file size matches the data size. No refs.
+    StandaloneLeaf = 11,
+
+    /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
+    /// extra null character ('\0'). File size is 1 bigger than the data size.
+    /// No refs.
+    StandaloneLeaf0 = 12,
+  };
+
+  static StringRef getStandaloneFilePrefix(StorageKind SK) {
+    switch (SK) {
+    default:
+      llvm_unreachable("Expected standalone storage kind");
+    case TrieRecord::StorageKind::Standalone:
+      return FilePrefixObject;
+    case TrieRecord::StorageKind::StandaloneLeaf:
+      return FilePrefixLeaf;
+    case TrieRecord::StorageKind::StandaloneLeaf0:
+      return FilePrefixLeaf0;
+    }
+  }
+
+  enum Limits : int64_t {
+    /// Saves files bigger than 64KB standalone instead of embedding them.
+    MaxEmbeddedSize = 64LL * 1024LL - 1,
+  };
+
+  struct Data {
+    StorageKind SK = StorageKind::Unknown;
+    FileOffset Offset;
+  };
+
+  /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
+  static uint64_t pack(Data D) {
+    assert(D.Offset.get() < (int64_t)(1ULL << 56));
+    uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
+    assert(D.SK != StorageKind::Unknown || Packed == 0);
+#ifndef NDEBUG
+    Data RoundTrip = unpack(Packed);
+    assert(D.SK == RoundTrip.SK);
+    assert(D.Offset.get() == RoundTrip.Offset.get());
+#endif
+    return Packed;
+  }
+
+  // Unpack TrieRecord into Data.
+  static Data unpack(uint64_t Packed) {
+    Data D;
+    if (!Packed)
+      return D;
+    D.SK = (StorageKind)(Packed >> 56);
+    D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
+    return D;
+  }
+
+  TrieRecord() : Storage(0) {}
+
+  Data load() const { return unpack(Storage); }
+  bool compare_exchange_strong(Data &Existing, Data New);
+
+private:
+  std::atomic<uint64_t> Storage;
+};
+
+/// DataStore record data: 4B + size? + refs? + data + 0
+/// - 4-bytes: Header
+/// - {0,4,8}-bytes: DataSize     (may be packed in Header)
+/// - {0,4,8}-bytes: NumRefs      (may be packed in Header)
+/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
+/// - <data>
+/// - 1-byte: 0-term
+struct DataRecordHandle {
+  /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
+  /// convenience to avoid computing padding later.
+  enum class NumRefsFlags : uint8_t {
+    Uses0B = 0U,
+    Uses1B = 1U,
+    Uses2B = 2U,
+    Uses4B = 3U,
+    Uses8B = 4U,
+    Max = Uses8B,
+  };
+
+  /// DataSize storage: 8B, 4B, 2B, or 1B.
+  enum class DataSizeFlags {
+    Uses1B = 0U,
+    Uses2B = 1U,
+    Uses4B = 2U,
+    Uses8B = 3U,
+    Max = Uses8B,
+  };
+
+  /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
+  enum class RefKindFlags {
+    InternalRef = 0U,
+    InternalRef4B = 1U,
+    Max = InternalRef4B,
+  };
+
+  enum Counts : int {
+    NumRefsShift = 0,
+    NumRefsBits = 3,
+    DataSizeShift = NumRefsShift + NumRefsBits,
+    DataSizeBits = 2,
+    RefKindShift = DataSizeShift + DataSizeBits,
+    RefKindBits = 1,
+  };
+  static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
+                    0,
+                "Not enough bits");
+  static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
+                    0,
+                "Not enough bits");
+  static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
+                    0,
+                "Not enough bits");
+
+  /// Layout of the DataRecordHandle and how to decode it.
+  struct LayoutFlags {
+    NumRefsFlags NumRefs;
+    DataSizeFlags DataSize;
+    RefKindFlags RefKind;
+
+    static uint64_t pack(LayoutFlags LF) {
+      unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
+                        ((unsigned)LF.DataSize << DataSizeShift) |
+                        ((unsigned)LF.RefKind << RefKindShift);
+#ifndef NDEBUG
+      LayoutFlags RoundTrip = unpack(Packed);
+      assert(LF.NumRefs == RoundTrip.NumRefs);
+      assert(LF.DataSize == RoundTrip.DataSize);
+      assert(LF.RefKind == RoundTrip.RefKind);
+#endif
+      return Packed;
+    }
+    static LayoutFlags unpack(uint64_t Storage) {
+      assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
+      LayoutFlags LF;
+      LF.NumRefs =
+          (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
+      LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
+                                    ((1U << DataSizeBits) - 1));
+      LF.RefKind =
+          (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
+      return LF;
+    }
+  };
+
+  /// Header layout:
+  /// - 1-byte:      LayoutFlags
+  /// - 1-byte:      1B size field
+  /// - {0,2}-bytes: 2B size field
+  struct Header {
+    using PackTy = uint32_t;
+    PackTy Packed;
+
+    static constexpr unsigned LayoutFlagsShift =
+        (sizeof(PackTy) - 1) * CHAR_BIT;
+  };
+
+  struct Input {
+    InternalRefArrayRef Refs;
+    ArrayRef<char> Data;
+  };
+
+  LayoutFlags getLayoutFlags() const {
+    return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
+  }
+
+  uint64_t getDataSize() const;
+  void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
+  uint32_t getNumRefs() const;
+  void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
+  int64_t getRefsRelOffset() const;
+  int64_t getDataRelOffset() const;
+
+  static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
+    return DataRelOffset + DataSize + 1;
+  }
+  uint64_t getTotalSize() const {
+    return getDataRelOffset() + getDataSize() + 1;
+  }
+
+  /// Describe the layout of data stored and how to decode from
+  /// DataRecordHandle.
+  struct Layout {
+    explicit Layout(const Input &I);
+
+    LayoutFlags Flags;
+    uint64_t DataSize = 0;
+    uint32_t NumRefs = 0;
+    int64_t RefsRelOffset = 0;
+    int64_t DataRelOffset = 0;
+    uint64_t getTotalSize() const {
+      return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
+    }
+  };
+
+  InternalRefArrayRef getRefs() const {
+    assert(H && "Expected valid handle");
+    auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
+    size_t Size = getNumRefs();
+    if (!Size)
+      return InternalRefArrayRef();
+    if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
+      return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
+    return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
+  }
+
+  ArrayRef<char> getData() const {
+    assert(H && "Expected valid handle");
+    return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
+                    getDataSize());
+  }
+
+  static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
+                                 const Input &I);
+  static Expected<DataRecordHandle>
+  createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
+                  const Input &I);
+  static DataRecordHandle construct(char *Mem, const Input &I);
+
+  static DataRecordHandle get(const char *Mem) {
+    return DataRecordHandle(
+        *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
+  }
+  static Expected<DataRecordHandle>
+  getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
+
+  explicit operator bool() const { return H; }
+  const Header &getHeader() const { return *H; }
+
+  DataRecordHandle() = default;
+  explicit DataRecordHandle(const Header &H) : H(&H) {}
+
+private:
+  static DataRecordHandle constructImpl(char *Mem, const Input &I,
+                                        const Layout &L);
+  const Header *H = nullptr;
+};
+
+/// Proxy for any on-disk object or raw data.
+struct OnDiskContent {
+  std::optional<DataRecordHandle> Record;
+  std::optional<ArrayRef<char>> Bytes;
+};
+
+/// Data loaded inside the memory from standalone file.
+class StandaloneDataInMemory {
+public:
+  OnDiskContent getContent() const;
+
+  StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
+                         TrieRecord::StorageKind SK)
+      : Region(std::move(Region)), SK(SK) {
+#ifndef NDEBUG
+    bool IsStandalone = false;
+    switch (SK) {
+    case TrieRecord::StorageKind::Standalone:
+    case TrieRecord::StorageKind::StandaloneLeaf:
+    case TrieRecord::StorageKind::StandaloneLeaf0:
+      IsStandalone = true;
+      break;
+    default:
+      break;
+    }
+    assert(IsStandalone);
+#endif
+  }
+
+private:
+  std::unique_ptr<sys::fs::mapped_file_region> Region;
+  TrieRecord::StorageKind SK;
+};
+
+/// Container to lookup loaded standalone objects.
+template <size_t NumShards> class StandaloneDataMap {
+  static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
+
+public:
+  uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
+                   std::unique_ptr<sys::fs::mapped_file_region> Region);
+
+  const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
+  bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
+
+private:
+  struct Shard {
+    /// Needs to store a std::unique_ptr for a stable address identity.
+    DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
+    mutable std::mutex Mutex;
+  };
+  Shard &getShard(ArrayRef<uint8_t> Hash) {
+    return const_cast<Shard &>(
+        const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
+  }
+  const Shard &getShard(ArrayRef<uint8_t> Hash) const {
+    static_assert(NumShards <= 256, "Expected only 8 bits of shard");
+    return Shards[Hash[0] % NumShards];
+  }
+
+  Shard Shards[NumShards];
+};
+
+using StandaloneDataMapTy = StandaloneDataMap<16>;
+
+/// A vector of internal node references.
+class InternalRefVector {
+public:
+  void push_back(InternalRef Ref) {
+    if (NeedsFull)
+      return FullRefs.push_back(Ref);
+    if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
+      return SmallRefs.push_back(*Small);
+    NeedsFull = true;
+    assert(FullRefs.empty());
+    FullRefs.reserve(SmallRefs.size() + 1);
+    for (InternalRef4B Small : SmallRefs)
+      FullRefs.push_back(Small);
+    FullRefs.push_back(Ref);
+    SmallRefs.clear();
+  }
+
+  operator InternalRefArrayRef() const {
+    assert(SmallRefs.empty() || FullRefs.empty());
+    return NeedsFull ? InternalRefArrayRef(FullRefs)
+                     : InternalRefArrayRef(SmallRefs);
+  }
+
+private:
+  bool NeedsFull = false;
+  SmallVector<InternalRef4B> SmallRefs;
+  SmallVector<InternalRef> FullRefs;
+};
+
+} // namespace
+
+Expected<DataRecordHandle> DataRecordHandle::createWithError(
+    function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
+  Layout L(I);
+  if (Expected<char *> Mem = Alloc(L.getTotalSize()))
+    return constructImpl(*Mem, I, L);
+  else
+    return Mem.takeError();
+}
+
+DataRecordHandle
+DataRecordHandle::create(function_ref<char *(size_t Size)> Alloc,
+                         const Input &I) {
+  Layout L(I);
+  return constructImpl(Alloc(L.getTotalSize()), I, L);
+}
+
+ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) {
+  // Store the file offset as it is.
+  assert(!(Offset.get() & 0x1));
+  return ObjectHandle(Offset.get());
+}
+
+ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) {
+  // Store the pointer from memory with lowest bit set.
+  assert(!(Ptr & 0x1));
+  return ObjectHandle(Ptr | 1);
+}
+
+/// Proxy for an on-disk index record.
+struct OnDiskGraphDB::IndexProxy {
+  FileOffset Offset;
+  ArrayRef<uint8_t> Hash;
+  TrieRecord &Ref;
+};
+
+template <size_t N>
+uintptr_t StandaloneDataMap<N>::insert(
+    ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
+    std::unique_ptr<sys::fs::mapped_file_region> Region) {
+  auto &S = getShard(Hash);
+  std::lock_guard<std::mutex> Lock(S.Mutex);
+  auto &V = S.Map[Hash.data()];
+  if (!V)
+    V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
+  return reinterpret_cast<uintptr_t>(V.get());
+}
+
+template <size_t N>
+const StandaloneDataInMemory *
+StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
+  auto &S = getShard(Hash);
+  std::lock_guard<std::mutex> Lock(S.Mutex);
+  auto I = S.Map.find(Hash.data());
+  if (I == S.Map.end())
+    return nullptr;
+  return &*I->second;
+}
+
+namespace {
+
+/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
+/// expensive to register/unregister at this rate.
+///
+/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
+/// files and has a signal handler registerd that removes them all.
+class TempFile {
+  bool Done = false;
+  TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
+
+public:
+  /// This creates a temporary file with createUniqueFile.
+  static Expected<TempFile> create(const Twine &Model);
+  TempFile(TempFile &&Other) { *this = std::move(Other); }
+  TempFile &operator=(TempFile &&Other) {
+    TmpName = std::move(Other.TmpName);
+    FD = Other.FD;
+    Other.Done = true;
+    Other.FD = -1;
+    return *this;
+  }
+
+  // Name of the temporary file.
+  std::string TmpName;
+
+  // The open file descriptor.
+  int FD = -1;
+
+  // Keep this with the given name.
+  Error keep(const Twine &Name);
+  Error discard();
+
+  // This checks that keep or delete was called.
+  ~TempFile() { consumeError(discard()); }
+};
+
+class MappedTempFile {
+public:
+  char *data() const { return Map.data(); }
+  size_t size() const { return Map.size(); }
+
+  Error discard() {
+    assert(Map && "Map already destroyed");
+    Map.unmap();
+    return Temp.discard();
+  }
+
+  Error keep(const Twine &Name) {
+    assert(Map && "Map already destroyed");
+    Map.unmap();
+    return Temp.keep(Name);
+  }
+
+  MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
+      : Temp(std::move(Temp)), Map(std::move(Map)) {}
+
+private:
+  TempFile Temp;
+  sys::fs::mapped_file_region Map;
+};
+} // namespace
+
+Error TempFile::discard() {
+  Done = true;
+  if (FD != -1) {
+    sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+    if (std::error_code EC = sys::fs::closeFile(File))
+      return errorCodeToError(EC);
+  }
+  FD = -1;
+
+  // Always try to close and remove.
+  std::error_code RemoveEC;
+  if (!TmpName.empty()) {
+    std::error_code EC = sys::fs::remove(TmpName);
+    if (EC)
+      return errorCodeToError(EC);
+  }
+  TmpName = "";
+
+  return Error::success();
+}
+
+Error TempFile::keep(const Twine &Name) {
+  assert(!Done);
+  Done = true;
+  // Always try to close and rename.
+  std::error_code RenameEC = sys::fs::rename(TmpName, Name);
+
+  if (!RenameEC)
+    TmpName = "";
+
+  sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+  if (std::error_code EC = sys::fs::closeFile(File))
+    return errorCodeToError(EC);
+  FD = -1;
+
+  return errorCodeToError(RenameEC);
+}
+
+Expected<TempFile> TempFile::create(const Twine &Model) {
+  int FD;
+  SmallString<128> ResultPath;
+  if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
+    return errorCodeToError(EC);
+
+  TempFile Ret(ResultPath, FD);
+  return std::move(Ret);
+}
+
+bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
+  uint64_t ExistingPacked = pack(Existing);
+  uint64_t NewPacked = pack(New);
+  if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
+    return true;
+  Existing = unpack(ExistingPacked);
+  return false;
+}
+
+DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) {
+  return constructImpl(Mem, I, Layout(I));
+}
+
+Expected<DataRecordHandle>
+DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
+                                  FileOffset Offset) {
+  auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
+  if (!HeaderData)
+    return HeaderData.takeError();
+
+  auto Record = DataRecordHandle::get(HeaderData->data());
+  if (Record.getTotalSize() + Offset.get() > Pool.size())
+    return createStringError(
+        make_error_code(std::errc::illegal_byte_sequence),
+        "data record span passed the end of the data pool");
+
+  return Record;
+}
+
+DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
+                                                 const Layout &L) {
+  char *Next = Mem + sizeof(Header);
+
+  // Fill in Packed and set other data, then come back to construct the header.
+  Header::PackTy Packed = 0;
+  Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
+
+  // Construct DataSize.
+  switch (L.Flags.DataSize) {
+  case DataSizeFlags::Uses1B:
+    assert(I.Data.size() <= UINT8_MAX);
+    Packed |= (Header::PackTy)I.Data.size()
+              << ((sizeof(Packed) - 2) * CHAR_BIT);
+    break;
+  case DataSizeFlags::Uses2B:
+    assert(I.Data.size() <= UINT16_MAX);
+    Packed |= (Header::PackTy)I.Data.size()
+              << ((sizeof(Packed) - 4) * CHAR_BIT);
+    break;
+  case DataSizeFlags::Uses4B:
+    support::endian::write32le(Next, I.Data.size());
+    Next += 4;
+    break;
+  case DataSizeFlags::Uses8B:
+    support::endian::write64le(Next, I.Data.size());
+    Next += 8;
+    break;
+  }
+
+  // Construct NumRefs.
+  //
+  // NOTE: May be writing NumRefs even if there are zero refs in order to fix
+  // alignment.
+  switch (L.Flags.NumRefs) {
+  case NumRefsFlags::Uses0B:
+    break;
+  case NumRefsFlags::Uses1B:
+    assert(I.Refs.size() <= UINT8_MAX);
+    Packed |= (Header::PackTy)I.Refs.size()
+              << ((sizeof(Packed) - 2) * CHAR_BIT);
+    break;
+  case NumRefsFlags::Uses2B:
+    assert(I.Refs.size() <= UINT16_MAX);
+    Packed |= (Header::PackTy)I.Refs.size()
+              << ((sizeof(Packed) - 4) * CHAR_BIT);
+    break;
+  case NumRefsFlags::Uses4B:
+    support::endian::write32le(Next, I.Refs.size());
+    Next += 4;
+    break;
+  case NumRefsFlags::Uses8B:
+    support::endian::write64le(Next, I.Refs.size());
+    Next += 8;
+    break;
+  }
+
+  // Construct Refs[].
+  if (!I.Refs.empty()) {
+    assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
+    ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
+    llvm::copy(RefsBuffer, Next);
+    Next += RefsBuffer.size();
+  }
+
+  // Construct Data and the trailing null.
+  assert(isAddrAligned(Align(8), Next));
+  llvm::copy(I.Data, Next);
+  Next[I.Data.size()] = 0;
+
+  // Construct the header itself and return.
+  Header *H = new (Mem) Header{Packed};
+  DataRecordHandle Record(*H);
+  assert(Record.getData() == I.Data);
+  assert(Record.getNumRefs() == I.Refs.size());
+  assert(Record.getRefs() == I.Refs);
+  assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
+  assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
+  assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
+  return Record;
+}
+
+DataRecordHandle::Layout::Layout(const Input &I) {
+  // Start initial relative offsets right after the Header.
+  uint64_t RelOffset = sizeof(Header);
+
+  // Initialize the easy stuff.
+  DataSize = I.Data.size();
+  NumRefs = I.Refs.size();
+
+  // Check refs size.
+  Flags.RefKind =
+      I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
+
+  // Find the smallest slot available for DataSize.
+  bool Has1B = true;
+  bool Has2B = true;
+  if (DataSize <= UINT8_MAX && Has1B) {
+    Flags.DataSize = DataSizeFlags::Uses1B;
+    Has1B = false;
+  } else if (DataSize <= UINT16_MAX && Has2B) {
+    Flags.DataSize = DataSizeFlags::Uses2B;
+    Has2B = false;
+  } else if (DataSize <= UINT32_MAX) {
+    Flags.DataSize = DataSizeFlags::Uses4B;
+    RelOffset += 4;
+  } else {
+    Flags.DataSize = DataSizeFlags::Uses8B;
+    RelOffset += 8;
+  }
+
+  // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
+  if (!NumRefs) {
+    Flags.NumRefs = NumRefsFlags::Uses0B;
+  } else if (NumRefs <= UINT8_MAX && Has1B) {
+    Flags.NumRefs = NumRefsFlags::Uses1B;
+    Has1B = false;
+  } else if (NumRefs <= UINT16_MAX && Has2B) {
+    Flags.NumRefs = NumRefsFlags::Uses2B;
+    Has2B = false;
+  } else {
+    Flags.NumRefs = NumRefsFlags::Uses4B;
+    RelOffset += 4;
+  }
+
+  // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
+  // padding rules when reading and writing. This also bumps RelOffset.
+  //
+  // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
+  // stored as 8B. This means we can *always* find a size to grow.
+  //
+  // NOTE: Only call this once.
+  auto GrowSizeFieldsBy4B = [&]() {
+    assert(isAligned(Align(4), RelOffset));
+    RelOffset += 4;
+
+    assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
+           "Expected to be able to grow NumRefs8B");
+
+    // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
+    // DataSize is upgraded to 8B it'll already be aligned.
+    //
+    // Failing that, grow NumRefs.
+    if (Flags.DataSize < DataSizeFlags::Uses4B)
+      Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
+    else if (Flags.DataSize < DataSizeFlags::Uses8B)
+      Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
+    else if (Flags.NumRefs < NumRefsFlags::Uses4B)
+      Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
+    else
+      Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
+  };
+
+  assert(isAligned(Align(4), RelOffset));
+  if (Flags.RefKind == RefKindFlags::InternalRef) {
+    // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
+    // without padding.
+    if (!isAligned(Align(8), RelOffset))
+      GrowSizeFieldsBy4B();
+
+    assert(isAligned(Align(8), RelOffset));
+    RefsRelOffset = RelOffset;
+    RelOffset += 8 * NumRefs;
+  } else {
+    // The array of 4B refs doesn't need 8B alignment, but the data will need
+    // to be 8B-aligned. Detect this now, and, if necessary, shift everything
+    // by 4B by growing one of the sizes.
+    // If we remove the need for 8B-alignment for data there is <1% savings in
+    // disk storage for a clang build using MCCAS but the 8B-alignment may be
+    // useful in the future so keep it for now.
+    uint64_t RefListSize = 4 * NumRefs;
+    if (!isAligned(Align(8), RelOffset + RefListSize))
+      GrowSizeFieldsBy4B();
+    RefsRelOffset = RelOffset;
+    RelOffset += RefListSize;
+  }
+
+  assert(isAligned(Align(8), RelOffset));
+  DataRelOffset = RelOffset;
+}
+
+uint64_t DataRecordHandle::getDataSize() const {
+  int64_t RelOffset = sizeof(Header);
+  auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
+  switch (getLayoutFlags().DataSize) {
+  case DataSizeFlags::Uses1B:
+    return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
+  case DataSizeFlags::Uses2B:
+    return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
+           UINT16_MAX;
+  case DataSizeFlags::Uses4B:
+    return support::endian::read32le(DataSizePtr);
+  case DataSizeFlags::Uses8B:
+    return support::endian::read64le(DataSizePtr);
+  }
+}
+
+void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
+  if (LF.DataSize >= DataSizeFlags::Uses4B)
+    RelOffset += 4;
+  if (LF.DataSize >= DataSizeFlags::Uses8B)
+    RelOffset += 4;
+}
+
+uint32_t DataRecordHandle::getNumRefs() const {
+  LayoutFlags LF = getLayoutFlags();
+  int64_t RelOffset = sizeof(Header);
+  skipDataSize(LF, RelOffset);
+  auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
+  switch (LF.NumRefs) {
+  case NumRefsFlags::Uses0B:
+    return 0;
+  case NumRefsFlags::Uses1B:
+    return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
+  case NumRefsFlags::Uses2B:
+    return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
+           UINT16_MAX;
+  case NumRefsFlags::Uses4B:
+    return support::endian::read32le(NumRefsPtr);
+  case NumRefsFlags::Uses8B:
+    return support::endian::read64le(NumRefsPtr);
+  }
+}
+
+void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
+  if (LF.NumRefs >= NumRefsFlags::Uses4B)
+    RelOffset += 4;
+  if (LF.NumRefs >= NumRefsFlags::Uses8B)
+    RelOffset += 4;
+}
+
+int64_t DataRecordHandle::getRefsRelOffset() const {
+  LayoutFlags LF = getLayoutFlags();
+  int64_t RelOffset = sizeof(Header);
+  skipDataSize(LF, RelOffset);
+  skipNumRefs(LF, RelOffset);
+  return RelOffset;
+}
+
+int64_t DataRecordHandle::getDataRelOffset() const {
+  LayoutFlags LF = getLayoutFlags();
+  int64_t RelOffset = sizeof(Header);
+  skipDataSize(LF, RelOffset);
+  skipNumRefs(LF, RelOffset);
+  uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
+  RelOffset += RefSize * getNumRefs();
+  return RelOffset;
+}
+
+Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
+  return Index.validate([&](FileOffset Offset,
+                            OnDiskTrieRawHashMap::ConstValueProxy Record)
+                            -> Error {
+    auto formatError = [&](Twine Msg) {
+      return createStringError(
+          llvm::errc::illegal_byte_sequence,
+          "bad record at 0x" +
+              utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+              Msg.str());
+    };
+
+    if (Record.Data.size() != sizeof(TrieRecord))
+      return formatError("wrong data record size");
+    if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
+      return formatError("wrong data record alignment");
+
+    auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
+    TrieRecord::Data D = R->load();
+    std::unique_ptr<MemoryBuffer> FileBuffer;
+    if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
+        (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
+        (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
+        (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
+        (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
+      return formatError("invalid record kind value");
+
+    auto Ref = InternalRef::getFromOffset(Offset);
+    auto I = getIndexProxyFromRef(Ref);
+    if (!I)
+      return I.takeError();
+
+    switch (D.SK) {
+    case TrieRecord::StorageKind::Unknown:
+      // This could be an abandoned entry due to a termination before updating
+      // the record. It can be reused by later insertion so just skip this entry
+      // for now.
+      return Error::success();
+    case TrieRecord::StorageKind::DataPool:
+      // Check offset is a postive value, and large enough to hold the
+      // header for the data record.
+      if (D.Offset.get() <= 0 ||
+          (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >=
+              DataPool.size())
+        return formatError("datapool record out of bound");
+      break;
+    case TrieRecord::StorageKind::Standalone:
+    case TrieRecord::StorageKind::StandaloneLeaf:
+    case TrieRecord::StorageKind::StandaloneLeaf0:
+      SmallString<256> Path;
+      getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
+      // If need to validate the content of the file later, just load the
+      // buffer here. Otherwise, just check the existance of the file.
+      if (Deep) {
+        auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
+                                          /*RequiresNullTerminator=*/false);
+        if (!File || !*File)
+          return formatError("record file \'" + Path + "\' does not exist");
+
+        FileBuffer = std::move(*File);
+      } else if (!llvm::sys::fs::exists(Path))
+        return formatError("record file \'" + Path + "\' does not exist");
+    }
+
+    if (!Deep)
+      return Error::success();
+
+    auto dataError = [&](Twine Msg) {
+      return createStringError(llvm::errc::illegal_byte_sequence,
+                               "bad data for digest \'" + toHex(I->Hash) +
+                                   "\': " + Msg.str());
+    };
+    SmallVector<ArrayRef<uint8_t>> Refs;
+    ArrayRef<char> StoredData;
+
+    switch (D.SK) {
+    case TrieRecord::StorageKind::Unknown:
+      llvm_unreachable("already handled");
+    case TrieRecord::StorageKind::DataPool: {
+      auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
+      if (!DataRecord)
+        return dataError(toString(DataRecord.takeError()));
+
+      for (auto InternRef : DataRecord->getRefs()) {
+        auto Index = getIndexProxyFromRef(InternRef);
+        if (!Index)
+          return Index.takeError();
+        Refs.push_back(Index->Hash);
+      }
+      StoredData = DataRecord->getData();
+      break;
+    }
+    case TrieRecord::StorageKind::Standalone: {
+      if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
+        return dataError("data record is not big enough to read the header");
+      auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
+      if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
+        return dataError(
+            "data record span passed the end of the standalone file");
+      for (auto InternRef : DataRecord.getRefs()) {
+        auto Index = getIndexProxyFromRef(InternRef);
+        if (!Index)
+          return Index.takeError();
+        Refs.push_back(Index->Hash);
+      }
+      StoredData = DataRecord.getData();
+      break;
+    }
+    case TrieRecord::StorageKind::StandaloneLeaf:
+    case TrieRecord::StorageKind::StandaloneLeaf0: {
+      StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
+      if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
+        if (!FileBuffer->getBuffer().ends_with('\0'))
+          return dataError("standalone file is not zero terminated");
+        StoredData = StoredData.drop_back(1);
+      }
+      break;
+    }
+    }
+
+    SmallVector<uint8_t> ComputedHash;
+    Hasher(Refs, StoredData, ComputedHash);
+    if (I->Hash != ArrayRef(ComputedHash))
+      return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
+                       "\' instead");
+
+    return Error::success();
+  });
+}
+
+void OnDiskGraphDB::print(raw_ostream &OS) const {
+  OS << "on-disk-root-path: " << RootPath << "\n";
+
+  struct PoolInfo {
+    uint64_t Offset;
+  };
+  SmallVector<PoolInfo> Pool;
+
+  OS << "\n";
+  OS << "index:\n";
+  Index.print(OS, [&](ArrayRef<char> Data) {
+    assert(Data.size() == sizeof(TrieRecord));
+    assert(isAligned(Align::Of<TrieRecord>(), Data.size()));
+    auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
+    TrieRecord::Data D = R->load();
+    OS << " SK=";
+    switch (D.SK) {
+    case TrieRecord::StorageKind::Unknown:
+      OS << "unknown          ";
+      break;
+    case TrieRecord::StorageKind::DataPool:
+      OS << "datapool         ";
+      Pool.push_back({D.Offset.get()});
+      break;
+    case TrieRecord::StorageKind::Standalone:
+      OS << "standalone-data  ";
+      break;
+    case TrieRecord::StorageKind::StandaloneLeaf:
+      OS << "standalone-leaf  ";
+      break;
+    case TrieRecord::StorageKind::StandaloneLeaf0:
+      OS << "standalone-leaf+0";
+      break;
+    }
+    OS << " Offset=" << (void *)D.Offset.get();
+  });
+  if (Pool.empty())
+    return;
+
+  OS << "\n";
+  OS << "pool:\n";
+  llvm::sort(
+      Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
+  for (PoolInfo PI : Pool) {
+    OS << "- addr=" << (void *)PI.Offset << " ";
+    auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
+    if (!D) {
+      OS << "error: " << toString(D.takeError());
+      return;
+    }
+
+    OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
+       << " size=" << D->getTotalSize()
+       << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
+  }
+}
+
+Expected<OnDiskGraphDB::IndexProxy>
+OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
+  auto P = Index.insertLazy(
+      Hash, [](FileOffset TentativeOffset,
+               OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
+        assert(TentativeValue.Data.size() == sizeof(TrieRecord));
+        assert(
+            isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
+        new (TentativeValue.Data.data()) TrieRecord();
+      });
+  if (LLVM_UNLIKELY(!P))
+    return P.takeError();
+
+  assert(*P && "Expected insertion");
+  return getIndexProxyFromPointer(*P);
+}
+
+OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
+    OnDiskTrieRawHashMap::ConstOnDiskPtr P) const {
+  assert(P);
+  assert(P.getOffset());
+  return IndexProxy{P.getOffset(), P->Hash,
+                    *const_cast<TrieRecord *>(
+                        reinterpret_cast<const TrieRecord *>(P->Data.data()))};
+}
+
+Expected<ObjectID> OnDiskGraphDB::getReference(ArrayRef<uint8_t> Hash) {
+  auto I = indexHash(Hash);
+  if (LLVM_UNLIKELY(!I))
+    return I.takeError();
+  return getExternalReference(*I);
+}
+
+ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
+  return getExternalReference(makeInternalRef(I.Offset));
+}
+
+std::optional<ObjectID>
+OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) {
+  auto tryUpstream =
+      [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
+    if (!UpstreamDB)
+      return std::nullopt;
+    std::optional<ObjectID> UpstreamID =
+        UpstreamDB->getExistingReference(Digest);
+    if (LLVM_UNLIKELY(!UpstreamID))
+      return std::nullopt;
+    auto Ref = expectedToOptional(indexHash(Digest));
+    if (!Ref)
+      return std::nullopt;
+    if (!I)
+      I.emplace(*Ref);
+    return getExternalReference(*I);
+  };
+
+  OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
+  if (!P)
+    return tryUpstream(std::nullopt);
+  IndexProxy I = getIndexProxyFromPointer(P);
+  TrieRecord::Data Obj = I.Ref.load();
+  if (Obj.SK == TrieRecord::StorageKind::Unknown)
+    return tryUpstream(I);
+  return getExternalReference(makeInternalRef(I.Offset));
+}
+
+Expected<OnDiskGraphDB::IndexProxy>
+OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
+  auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
+  if (LLVM_UNLIKELY(!P))
+    return P.takeError();
+  return getIndexProxyFromPointer(*P);
+}
+
+Expected<ArrayRef<uint8_t>> OnDiskGraphDB::getDigest(InternalRef Ref) const {
+  auto I = getIndexProxyFromRef(Ref);
+  if (!I)
+    return I.takeError();
+  return I->Hash;
+}
+
+ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
+  return I.Hash;
+}
+
+static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
+                                          ObjectHandle OH) {
+  // Decode ObjectHandle to locate the stored content.
+  uint64_t Data = OH.getOpaqueData();
+  if (Data & 1) {
+    const auto *SDIM =
+        reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
+    return SDIM->getContent();
+  }
+
+  auto DataHandle =
+      cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
+  assert(DataHandle.getData().end()[0] == 0 && "Null termination");
+  return OnDiskContent{DataHandle, std::nullopt};
+}
+
+ArrayRef<char> OnDiskGraphDB::getObjectData(ObjectHandle Node) const {
+  OnDiskContent Content = getContentFromHandle(DataPool, Node);
+  if (Content.Bytes)
+    return *Content.Bytes;
+  assert(Content.Record && "Expected record or bytes");
+  return Content.Record->getData();
+}
+
+InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
+  if (std::optional<DataRecordHandle> Record =
+          getContentFromHandle(DataPool, Node).Record)
+    return Record->getRefs();
+  return std::nullopt;
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskGraphDB::load(ObjectID ExternalRef) {
+  InternalRef Ref = getInternalRef(ExternalRef);
+  auto I = getIndexProxyFromRef(Ref);
+  if (!I)
+    return I.takeError();
+  TrieRecord::Data Object = I->Ref.load();
+
+  if (Object.SK == TrieRecord::StorageKind::Unknown) {
+    if (!UpstreamDB)
+      return std::nullopt;
+    return faultInFromUpstream(ExternalRef);
+  }
+
+  if (Object.SK == TrieRecord::StorageKind::DataPool)
+    return ObjectHandle::fromFileOffset(Object.Offset);
+
+  // Only TrieRecord::StorageKind::Standalone (and variants) need to be
+  // explicitly loaded.
+  //
+  // There's corruption if standalone objects have offsets, or if we get here
+  // for something that isn't standalone.
+  if (Object.Offset)
+    return createCorruptObjectError(getDigest(*I));
+  switch (Object.SK) {
+  case TrieRecord::StorageKind::Unknown:
+  case TrieRecord::StorageKind::DataPool:
+    llvm_unreachable("unexpected storage kind");
+  case TrieRecord::StorageKind::Standalone:
+  case TrieRecord::StorageKind::StandaloneLeaf0:
+  case TrieRecord::StorageKind::StandaloneLeaf:
+    break;
+  }
+
+  // Load it from disk.
+  //
+  // Note: Creation logic guarantees that data that needs null-termination is
+  // suitably 0-padded. Requiring null-termination here would be too expensive
+  // for extremely large objects that happen to be page-aligned.
+  SmallString<256> Path;
+  getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
+
+  auto File = sys::fs::openNativeFileForRead(Path);
+  if (!File)
+    return createFileError(Path, File.takeError());
+
+  auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); });
+
+  sys::fs::file_status Status;
+  if (std::error_code EC = sys::fs::status(*File, Status))
+    return createCorruptObjectError(getDigest(*I));
+
+  std::error_code EC;
+  auto Region = std::make_unique<sys::fs::mapped_file_region>(
+      *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
+  if (EC)
+    return createCorruptObjectError(getDigest(*I));
+
+  return ObjectHandle::fromMemory(
+      static_cast<StandaloneDataMapTy *>(StandaloneData)
+          ->insert(I->Hash, Object.SK, std::move(Region)));
+}
+
+Expected<bool> OnDiskGraphDB::isMaterialized(ObjectID Ref) {
+  auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
+  if (!Presence)
+    return Presence.takeError();
+
+  switch (*Presence) {
+  case ObjectPresence::Missing:
+    return false;
+  case ObjectPresence::InPrimaryDB:
+    return true;
+  case ObjectPresence::OnlyInUpstreamDB:
+    if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
+      return FaultInResult.takeError();
+    return true;
+  }
+}
+
+Expected<OnDiskGraphDB::ObjectPresence>
+OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
+                                 bool CheckUpstream) const {
+  InternalRef Ref = getInternalRef(ExternalRef);
+  auto I = getIndexProxyFromRef(Ref);
+  if (!I)
+    return I.takeError();
+
+  TrieRecord::Data Object = I->Ref.load();
+  if (Object.SK != TrieRecord::StorageKind::Unknown)
+    return ObjectPresence::InPrimaryDB;
+  if (!CheckUpstream || !UpstreamDB)
+    return ObjectPresence::Missing;
+  std::optional<ObjectID> UpstreamID =
+      UpstreamDB->getExistingReference(getDigest(*I));
+  return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
+                                : ObjectPresence::Missing;
+}
+
+InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
+  return InternalRef::getFromOffset(IndexOffset);
+}
+
+void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
+                                      SmallVectorImpl<char> &Path) const {
+  Path.assign(RootPath.begin(), RootPath.end());
+  sys::path::append(Path,
+                    Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
+}
+
+OnDiskContent StandaloneDataInMemory::getContent() const {
+  bool Leaf0 = false;
+  bool Leaf = false;
+  switch (SK) {
+  default:
+    llvm_unreachable("Storage kind must be standalone");
+  case TrieRecord::StorageKind::Standalone:
+    break;
+  case TrieRecord::StorageKind::StandaloneLeaf0:
+    Leaf = Leaf0 = true;
+    break;
+  case TrieRecord::StorageKind::StandaloneLeaf:
+    Leaf = true;
+    break;
+  }
+
+  if (Leaf) {
+    StringRef Data(Region->data(), Region->size());
+    assert(Data.drop_back(Leaf0).end()[0] == 0 &&
+           "Standalone node data missing null termination");
+    return OnDiskContent{std::nullopt,
+                         arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
+  }
+
+  DataRecordHandle Record = DataRecordHandle::get(Region->data());
+  assert(Record.getData().end()[0] == 0 &&
+         "Standalone object record missing null termination for data");
+  return OnDiskContent{Record, std::nullopt};
+}
+
+static Expected<MappedTempFile> createTempFile(StringRef FinalPath,
+                                               uint64_t Size) {
+  assert(Size && "Unexpected request for an empty temp file");
+  Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
+  if (!File)
+    return File.takeError();
+
+  if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
+    return createFileError(File->TmpName, std::move(E));
+
+  if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
+    return createFileError(File->TmpName, EC);
+
+  std::error_code EC;
+  sys::fs::mapped_file_region Map(sys::fs::convertFDToNativeFile(File->FD),
+                                  sys::fs::mapped_file_region::readwrite, Size,
+                                  0, EC);
+  if (EC)
+    return createFileError(File->TmpName, EC);
+  return MappedTempFile(std::move(*File), std::move(Map));
+}
+
+static size_t getPageSize() {
+  static int PageSize = sys::Process::getPageSizeEstimate();
+  return PageSize;
+}
+
+Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
+  assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
+         "Expected a bigger file for external content...");
+
+  bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
+  TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
+                                     : TrieRecord::StorageKind::StandaloneLeaf;
+
+  SmallString<256> Path;
+  int64_t FileSize = Data.size() + Leaf0;
+  getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
+
+  // Write the file. Don't reuse this mapped_file_region, which is read/write.
+  // Let load() pull up one that's read-only.
+  Expected<MappedTempFile> File = createTempFile(Path, FileSize);
+  if (!File)
+    return File.takeError();
+  assert(File->size() == (uint64_t)FileSize);
+  llvm::copy(Data, File->data());
+  if (Leaf0)
+    File->data()[Data.size()] = 0;
+  assert(File->data()[Data.size()] == 0);
+  if (Error E = File->keep(Path))
+    return E;
+
+  // Store the object reference.
+  TrieRecord::Data Existing;
+  {
+    TrieRecord::Data Leaf{SK, FileOffset()};
+    if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
+      recordStandaloneSizeIncrease(FileSize);
+      return Error::success();
+    }
+  }
+
+  // If there was a race, confirm that the new value has valid storage.
+  if (Existing.SK == TrieRecord::StorageKind::Unknown)
+    return createCorruptObjectError(getDigest(I));
+
+  return Error::success();
+}
+
+Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
+                           ArrayRef<char> Data) {
+  auto I = getIndexProxyFromRef(getInternalRef(ID));
+  if (LLVM_UNLIKELY(!I))
+    return I.takeError();
+
+  // Early return in case the node exists.
+  {
+    TrieRecord::Data Existing = I->Ref.load();
+    if (Existing.SK != TrieRecord::StorageKind::Unknown)
+      return Error::success();
+  }
+
+  // Big leaf nodes.
+  if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
+    return createStandaloneLeaf(*I, Data);
+
+  // TODO: Check whether it's worth checking the index for an already existing
+  // object (like storeTreeImpl() does) before building up the
+  // InternalRefVector.
+  InternalRefVector InternalRefs;
+  for (ObjectID Ref : Refs)
+    InternalRefs.push_back(getInternalRef(Ref));
+
+  // Create the object.
+
+  DataRecordHandle::Input Input{InternalRefs, Data};
+
+  // Compute the storage kind, allocate it, and create the record.
+  TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
+  FileOffset PoolOffset;
+  SmallString<256> Path;
+  std::optional<MappedTempFile> File;
+  std::optional<uint64_t> FileSize;
+  auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
+    getStandalonePath(TrieRecord::getStandaloneFilePrefix(
+                          TrieRecord::StorageKind::Standalone),
+                      *I, Path);
+    if (Error E = createTempFile(Path, Size).moveInto(File))
+      return std::move(E);
+    assert(File->size() == Size);
+    FileSize = Size;
+    SK = TrieRecord::StorageKind::Standalone;
+    return File->data();
+  };
+  auto Alloc = [&](size_t Size) -> Expected<char *> {
+    if (Size <= TrieRecord::MaxEmbeddedSize) {
+      SK = TrieRecord::StorageKind::DataPool;
+      auto P = DataPool.allocate(Size);
+      if (LLVM_UNLIKELY(!P)) {
+        char *NewAlloc = nullptr;
+        auto NewE = handleErrors(
+            P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
+              if (E->convertToErrorCode() == std::errc::not_enough_memory)
+                return AllocStandaloneFile(Size).moveInto(NewAlloc);
+              return Error(std::move(E));
+            });
+        if (!NewE)
+          return NewAlloc;
+        return std::move(NewE);
+      }
+      PoolOffset = P->getOffset();
+      LLVM_DEBUG({
+        dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
+               << " size=" << Size
+               << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
+      });
+      return (*P)->data();
+    }
+    return AllocStandaloneFile(Size);
+  };
+
+  DataRecordHandle Record;
+  if (Error E =
+          DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
+    return E;
+  assert(Record.getData().end()[0] == 0 && "Expected null-termination");
+  assert(Record.getData() == Input.Data && "Expected initialization");
+  assert(SK != TrieRecord::StorageKind::Unknown);
+  assert(bool(File) != bool(PoolOffset) &&
+         "Expected either a mapped file or a pooled offset");
+
+  // Check for a race before calling MappedTempFile::keep().
+  //
+  // Then decide what to do with the file. Better to discard than overwrite if
+  // another thread/process has already added this.
+  TrieRecord::Data Existing = I->Ref.load();
+  {
+    TrieRecord::Data NewObject{SK, PoolOffset};
+    if (File) {
+      if (Existing.SK == TrieRecord::StorageKind::Unknown) {
+        // Keep the file!
+        if (Error E = File->keep(Path))
+          return E;
+      } else {
+        File.reset();
+      }
+    }
+
+    // If we didn't already see a racing/existing write, then try storing the
+    // new object. If that races, confirm that the new value has valid storage.
+    //
+    // TODO: Find a way to reuse the storage from the new-but-abandoned record
+    // handle.
+    if (Existing.SK == TrieRecord::StorageKind::Unknown) {
+      if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
+        if (FileSize)
+          recordStandaloneSizeIncrease(*FileSize);
+        return Error::success();
+      }
+    }
+  }
+
+  if (Existing.SK == TrieRecord::StorageKind::Unknown)
+    return createCorruptObjectError(getDigest(*I));
+
+  // Load existing object.
+  return Error::success();
+}
+
+void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
+  standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
+}
+
+std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
+  MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
+  assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
+  assert(isAddrAligned(Align(8), UserHeader.data()));
+  return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
+}
+
+uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
+  return standaloneStorageSize().load(std::memory_order_relaxed);
+}
+
+size_t OnDiskGraphDB::getStorageSize() const {
+  return Index.size() + DataPool.size() + getStandaloneStorageSize();
+}
+
+unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
+  unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
+  unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
+  return std::max(IndexPercent, DataPercent);
+}
+
+Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
+    StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
+    std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
+  if (std::error_code EC = sys::fs::create_directories(AbsPath))
+    return createFileError(AbsPath, EC);
+
+  constexpr uint64_t MB = 1024ull * 1024ull;
+  constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
+
+  uint64_t MaxIndexSize = 12 * GB;
+  uint64_t MaxDataPoolSize = 24 * GB;
+
+  if (useSmallMappingSize(AbsPath)) {
+    MaxIndexSize = 1 * GB;
+    MaxDataPoolSize = 2 * GB;
+  }
+
+  auto CustomSize = getOverriddenMaxMappingSize();
+  if (!CustomSize)
+    return CustomSize.takeError();
+  if (*CustomSize)
+    MaxIndexSize = MaxDataPoolSize = **CustomSize;
+
+  SmallString<256> IndexPath(AbsPath);
+  sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion);
+  std::optional<OnDiskTrieRawHashMap> Index;
+  if (Error E = OnDiskTrieRawHashMap::create(
+                    IndexPath, IndexTableName + "[" + HashName + "]",
+                    HashByteSize * CHAR_BIT,
+                    /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
+                    /*MinFileSize=*/MB)
+                    .moveInto(Index))
+    return std::move(E);
+
+  uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
+
+  SmallString<256> DataPoolPath(AbsPath);
+  sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion);
+  std::optional<OnDiskDataAllocator> DataPool;
+  StringRef PolicyName =
+      Policy == FaultInPolicy::SingleNode ? "single" : "full";
+  if (Error E = OnDiskDataAllocator::create(
+                    DataPoolPath,
+                    DataPoolTableName + "[" + HashName + "]" + PolicyName,
+                    MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
+                    [](void *UserHeaderPtr) {
+                      new (UserHeaderPtr) std::atomic<uint64_t>(0);
+                    })
+                    .moveInto(DataPool))
+    return std::move(E);
+  if (DataPool->getUserHeader().size() != UserHeaderSize)
+    return createStringError(llvm::errc::argument_out_of_domain,
+                             "unexpected user header in '" + DataPoolPath +
+                                 "'");
+
+  return std::unique_ptr<OnDiskGraphDB>(
+      new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
+                        std::move(UpstreamDB), Policy));
+}
+
+OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
+                             OnDiskDataAllocator DataPool,
+                             std::unique_ptr<OnDiskGraphDB> UpstreamDB,
+                             FaultInPolicy Policy)
+    : Index(std::move(Index)), DataPool(std::move(DataPool)),
+      RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
+      FIPolicy(Policy) {
+  /// Lifetime for "big" objects not in DataPool.
+  ///
+  /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
+  /// simpler on the assumption there won't be much contention since most data
+  /// is not big. If there is contention, and we've already fixed ObjectProxy
+  /// object handles to be cheap enough to use consistently, the fix might be
+  /// to use better use of them rather than optimizing this map.
+  ///
+  /// FIXME: Figure out the right number of shards, if any.
+  StandaloneData = new StandaloneDataMapTy();
+}
+
+OnDiskGraphDB::~OnDiskGraphDB() {
+  delete static_cast<StandaloneDataMapTy *>(StandaloneData);
+}
+
+Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
+                                    ObjectHandle UpstreamNode) {
+  // Copies the full CAS tree from upstream. Uses depth-first copying to protect
+  // against the process dying during importing and leaving the database with an
+  // incomplete tree. Note that if the upstream has missing nodes then the tree
+  // will be copied with missing nodes as well, it won't be considered an error.
+
+  struct UpstreamCursor {
+    ObjectHandle Node;
+    size_t RefsCount;
+    object_refs_iterator RefI;
+    object_refs_iterator RefE;
+  };
+  /// Keeps track of the state of visitation for current node and all of its
+  /// parents.
+  SmallVector<UpstreamCursor, 16> CursorStack;
+  /// Keeps track of the currently visited nodes as they are imported into
+  /// primary database, from current node and its parents. When a node is
+  /// entered for visitation it appends its own ID, then appends referenced IDs
+  /// as they get imported. When a node is fully imported it removes the
+  /// referenced IDs from the bottom of the stack which leaves its own ID at the
+  /// bottom, adding to the list of referenced IDs for the parent node.
+  SmallVector<ObjectID, 128> PrimaryNodesStack;
+
+  auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
+    PrimaryNodesStack.push_back(PrimaryID);
+    if (!Node)
+      return;
+    auto Refs = UpstreamDB->getObjectRefs(*Node);
+    CursorStack.push_back({*Node,
+                           (size_t)std::distance(Refs.begin(), Refs.end()),
+                           Refs.begin(), Refs.end()});
+  };
+
+  enqueueNode(PrimaryID, UpstreamNode);
+
+  while (!CursorStack.empty()) {
+    UpstreamCursor &Cur = CursorStack.back();
+    if (Cur.RefI == Cur.RefE) {
+      // Copy the node data into the primary store.
+      // FIXME: Use hard-link or cloning if the file-system supports it and data
+      // is stored into a separate file.
+
+      // The bottom of \p PrimaryNodesStack contains the primary ID for the
+      // current node plus the list of imported referenced IDs.
+      assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
+      ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
+      auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
+                             .slice(PrimaryNodesStack.size() - Cur.RefsCount);
+      auto Data = UpstreamDB->getObjectData(Cur.Node);
+      if (Error E = store(PrimaryID, PrimaryRefs, Data))
+        return E;
+      // Remove the current node and its IDs from the stack.
+      PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
+      CursorStack.pop_back();
+      continue;
+    }
+
+    ObjectID UpstreamID = *(Cur.RefI++);
+    auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
+    if (LLVM_UNLIKELY(!PrimaryID))
+      return PrimaryID.takeError();
+    if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
+      // This \p ObjectID already exists in the primary. Either it was imported
+      // via \p importFullTree or the client created it, in which case the
+      // client takes responsibility for how it was formed.
+      enqueueNode(*PrimaryID, std::nullopt);
+      continue;
+    }
+    Expected<std::optional<ObjectHandle>> UpstreamNode =
+        UpstreamDB->load(UpstreamID);
+    if (!UpstreamNode)
+      return UpstreamNode.takeError();
+    enqueueNode(*PrimaryID, *UpstreamNode);
+  }
+
+  assert(PrimaryNodesStack.size() == 1);
+  assert(PrimaryNodesStack.front() == PrimaryID);
+  return Error::success();
+}
+
+Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
+                                      ObjectHandle UpstreamNode) {
+  // Copies only a single node, it doesn't copy the referenced nodes.
+
+  // Copy the node data into the primary store.
+  // FIXME: Use hard-link or cloning if the file-system supports it and data is
+  // stored into a separate file.
+
+  auto Data = UpstreamDB->getObjectData(UpstreamNode);
+  auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
+  SmallVector<ObjectID, 64> Refs;
+  Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end()));
+  for (ObjectID UpstreamRef : UpstreamRefs) {
+    auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
+    if (LLVM_UNLIKELY(!Ref))
+      return Ref.takeError();
+    Refs.push_back(*Ref);
+  }
+
+  return store(PrimaryID, Refs, Data);
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
+  assert(UpstreamDB);
+
+  auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
+  if (LLVM_UNLIKELY(!UpstreamID))
+    return UpstreamID.takeError();
+
+  Expected<std::optional<ObjectHandle>> UpstreamNode =
+      UpstreamDB->load(*UpstreamID);
+  if (!UpstreamNode)
+    return UpstreamNode.takeError();
+  if (!*UpstreamNode)
+    return std::nullopt;
+
+  if (Error E = FIPolicy == FaultInPolicy::SingleNode
+                    ? importSingleNode(PrimaryID, **UpstreamNode)
+                    : importFullTree(PrimaryID, **UpstreamNode))
+    return std::move(E);
+  return load(PrimaryID);
+}
diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
new file mode 100644
index 0000000000000..21860717da3bf
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -0,0 +1,113 @@
+//===- OnDiskKeyValueDB.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements OnDiskKeyValueDB, an ondisk key value database.
+///
+/// The KeyValue database file is named `actions.<version>` inside the CAS
+/// directory. The database stores a mapping between a fixed-sized key and a
+/// fixed-sized value, where the size of key and value can be configured when
+/// opening the database.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+static constexpr StringLiteral ActionCacheFile = "actions.";
+
+Expected<ArrayRef<char>> OnDiskKeyValueDB::put(ArrayRef<uint8_t> Key,
+                                               ArrayRef<char> Value) {
+  if (LLVM_UNLIKELY(Value.size() != ValueSize))
+    return createStringError(errc::invalid_argument,
+                             "expected value size of " + itostr(ValueSize) +
+                                 ", got: " + itostr(Value.size()));
+  assert(Value.size() == ValueSize);
+  auto ActionP = Cache.insertLazy(
+      Key, [&](FileOffset TentativeOffset,
+               OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
+        assert(TentativeValue.Data.size() == ValueSize);
+        llvm::copy(Value, TentativeValue.Data.data());
+      });
+  if (LLVM_UNLIKELY(!ActionP))
+    return ActionP.takeError();
+  return (*ActionP)->Data;
+}
+
+Expected<std::optional<ArrayRef<char>>>
+OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
+  // Check the result cache.
+  OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key);
+  if (!ActionP)
+    return std::nullopt;
+  assert(isAddrAligned(Align(8), ActionP->Data.data()));
+  return ActionP->Data;
+}
+
+Expected<std::unique_ptr<OnDiskKeyValueDB>>
+OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
+                       StringRef ValueName, size_t ValueSize) {
+  if (std::error_code EC = sys::fs::create_directories(Path))
+    return createFileError(Path, EC);
+
+  SmallString<256> CachePath(Path);
+  sys::path::append(CachePath, ActionCacheFile + CASFormatVersion);
+  constexpr uint64_t MB = 1024ull * 1024ull;
+  constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
+
+  uint64_t MaxFileSize = GB;
+  auto CustomSize = getOverriddenMaxMappingSize();
+  if (!CustomSize)
+    return CustomSize.takeError();
+  if (*CustomSize)
+    MaxFileSize = **CustomSize;
+
+  std::optional<OnDiskTrieRawHashMap> ActionCache;
+  if (Error E = OnDiskTrieRawHashMap::create(
+                    CachePath,
+                    "llvm.actioncache[" + HashName + "->" + ValueName + "]",
+                    KeySize * 8,
+                    /*DataSize=*/ValueSize, MaxFileSize, /*MinFileSize=*/MB)
+                    .moveInto(ActionCache))
+    return std::move(E);
+
+  return std::unique_ptr<OnDiskKeyValueDB>(
+      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
+}
+
+Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
+  return Cache.validate(
+      [&](FileOffset Offset,
+          OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {
+        auto formatError = [&](Twine Msg) {
+          return createStringError(
+              llvm::errc::illegal_byte_sequence,
+              "bad cache value at 0x" +
+                  utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+                  Msg.str());
+        };
+
+        if (Record.Data.size() != ValueSize)
+          return formatError("wrong cache value size");
+        if (!isAddrAligned(Align(8), Record.Data.data()))
+          return formatError("wrong cache value alignment");
+        if (CheckValue)
+          return CheckValue(Offset, Record.Data);
+        return Error::success();
+      });
+}
diff --git a/llvm/unittests/CAS/CASTestConfig.cpp b/llvm/unittests/CAS/CASTestConfig.cpp
index 29e2db48db5c0..91d0970367ac3 100644
--- a/llvm/unittests/CAS/CASTestConfig.cpp
+++ b/llvm/unittests/CAS/CASTestConfig.cpp
@@ -19,3 +19,17 @@ static CASTestingEnv createInMemory(int I) {
 
 INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
                          ::testing::Values(createInMemory));
+
+#if LLVM_ENABLE_ONDISK_CAS
+namespace llvm::cas::ondisk {
+extern void setMaxMappingSize(uint64_t Size);
+} // namespace llvm::cas::ondisk
+
+void setMaxOnDiskCASMappingSize() {
+  static std::once_flag Flag;
+  std::call_once(
+      Flag, [] { llvm::cas::ondisk::setMaxMappingSize(100 * 1024 * 1024); });
+}
+#else
+void setMaxOnDiskCASMappingSize() {}
+#endif /* LLVM_ENABLE_ONDISK_CAS */
diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h
index 8093a0b0864f9..c08968b95b9cc 100644
--- a/llvm/unittests/CAS/CASTestConfig.h
+++ b/llvm/unittests/CAS/CASTestConfig.h
@@ -18,6 +18,17 @@ struct CASTestingEnv {
   std::unique_ptr<llvm::cas::ActionCache> Cache;
 };
 
+void setMaxOnDiskCASMappingSize();
+
+// Test fixture for on-disk data base tests.
+class OnDiskCASTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    // Use a smaller database size for testing to conserve disk space.
+    setMaxOnDiskCASMappingSize();
+  }
+};
+
 class CASTest
     : public testing::TestWithParam<std::function<CASTestingEnv(int)>> {
 protected:
diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt
index ee40e6c9879a1..da469f7fccb5a 100644
--- a/llvm/unittests/CAS/CMakeLists.txt
+++ b/llvm/unittests/CAS/CMakeLists.txt
@@ -1,3 +1,19 @@
+set(ONDISK_CAS_TEST_SOURCES
+  OnDiskGraphDBTest.cpp
+  OnDiskDataAllocatorTest.cpp
+  OnDiskKeyValueDBTest.cpp
+  OnDiskTrieRawHashMapTest.cpp
+  ProgramTest.cpp
+  )
+
+set(LLVM_OPTIONAL_SOURCES
+  ${ONDISK_CAS_TEST_SOURCES}
+  )
+
+if (NOT LLVM_ENABLE_ONDISK_CAS)
+  unset(ONDISK_CAS_TEST_SOURCES)
+endif()
+
 set(LLVM_LINK_COMPONENTS
   Support
   CAS
@@ -8,9 +24,8 @@ add_llvm_unittest(CASTests
   ActionCacheTest.cpp
   CASTestConfig.cpp
   ObjectStoreTest.cpp
-  OnDiskDataAllocatorTest.cpp
-  OnDiskTrieRawHashMapTest.cpp
-  ProgramTest.cpp
+
+  ${ONDISK_CAS_TEST_SOURCES}
   )
 
 target_link_libraries(CASTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h
new file mode 100644
index 0000000000000..57c8c228867fd
--- /dev/null
+++ b/llvm/unittests/CAS/OnDiskCommonUtils.h
@@ -0,0 +1,76 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Helper functions to test OnDiskCASDatabases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/Support/BLAKE3.h"
+#include "llvm/Testing/Support/Error.h"
+
+namespace llvm::unittest::cas {
+
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+using HasherT = BLAKE3;
+using HashType = decltype(HasherT::hash(std::declval<ArrayRef<uint8_t> &>()));
+using ValueType = std::array<char, 20>;
+
+inline HashType digest(StringRef Data, ArrayRef<ArrayRef<uint8_t>> RefHashes) {
+  return BuiltinObjectHasher<HasherT>::hashObject(
+      RefHashes, arrayRefFromStringRef<char>(Data));
+}
+
+inline ObjectID digest(OnDiskGraphDB &DB, StringRef Data,
+                       ArrayRef<ObjectID> Refs) {
+  SmallVector<ArrayRef<uint8_t>, 8> RefHashes;
+  for (ObjectID Ref : Refs)
+    RefHashes.push_back(DB.getDigest(Ref));
+  HashType Digest = digest(Data, RefHashes);
+  std::optional<ObjectID> ID;
+  EXPECT_THAT_ERROR(DB.getReference(Digest).moveInto(ID), Succeeded());
+  return *ID;
+}
+
+inline HashType digest(StringRef Data) {
+  return HasherT::hash(arrayRefFromStringRef(Data));
+}
+
+inline ValueType valueFromString(StringRef S) {
+  ValueType Val;
+  llvm::copy(S.substr(0, sizeof(Val)), Val.data());
+  return Val;
+}
+
+inline Expected<ObjectID> store(OnDiskGraphDB &DB, StringRef Data,
+                                ArrayRef<ObjectID> Refs) {
+  ObjectID ID = digest(DB, Data, Refs);
+  if (Error E = DB.store(ID, Refs, arrayRefFromStringRef<char>(Data)))
+    return std::move(E);
+  return ID;
+}
+
+inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS,
+                       unsigned Indent = 0) {
+  std::optional<ondisk::ObjectHandle> Obj;
+  if (Error E = DB.load(ID).moveInto(Obj))
+    return E;
+  if (!Obj)
+    return Error::success();
+  OS.indent(Indent) << toStringRef(DB.getObjectData(*Obj)) << '\n';
+  for (ObjectID Ref : DB.getObjectRefs(*Obj)) {
+    if (Error E = printTree(DB, Ref, OS, Indent + 2))
+      return E;
+  }
+  return Error::success();
+}
+
+} // namespace llvm::unittest::cas
diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
new file mode 100644
index 0000000000000..58f5dcc625a09
--- /dev/null
+++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
@@ -0,0 +1,310 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CASTestConfig.h"
+#include "OnDiskCommonUtils.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+using namespace llvm::unittest::cas;
+
+TEST_F(OnDiskCASTest, OnDiskGraphDBTest) {
+  unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
+  std::unique_ptr<OnDiskGraphDB> DB;
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
+      Succeeded());
+
+  auto digest = [&DB](StringRef Data, ArrayRef<ObjectID> Refs) -> ObjectID {
+    return ::digest(*DB, Data, Refs);
+  };
+
+  auto store = [&](StringRef Data,
+                   ArrayRef<ObjectID> Refs) -> Expected<ObjectID> {
+    return ::store(*DB, Data, Refs);
+  };
+
+  std::optional<ObjectID> ID1;
+  ASSERT_THAT_ERROR(store("hello", {}).moveInto(ID1), Succeeded());
+
+  std::optional<ondisk::ObjectHandle> Obj1;
+  ASSERT_THAT_ERROR(DB->load(*ID1).moveInto(Obj1), Succeeded());
+  ASSERT_TRUE(Obj1.has_value());
+  EXPECT_EQ(toStringRef(DB->getObjectData(*Obj1)), "hello");
+
+  ArrayRef<uint8_t> Digest1 = DB->getDigest(*ID1);
+  std::optional<ObjectID> ID2;
+  ASSERT_THAT_ERROR(DB->getReference(Digest1).moveInto(ID2), Succeeded());
+  EXPECT_EQ(ID1, ID2);
+
+  ObjectID ID3 = digest("world", {});
+  EXPECT_FALSE(DB->containsObject(ID3));
+  std::optional<ondisk::ObjectHandle> Obj2;
+  ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded());
+  EXPECT_FALSE(Obj2.has_value());
+
+  ASSERT_THAT_ERROR(DB->store(ID3, {}, arrayRefFromStringRef<char>("world")),
+                    Succeeded());
+  EXPECT_TRUE(DB->containsObject(ID3));
+  ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded());
+  ASSERT_TRUE(Obj2.has_value());
+  EXPECT_EQ(toStringRef(DB->getObjectData(*Obj2)), "world");
+
+  size_t LargeDataSize = 256LL * 1024LL; // 256K.
+  // The precise size number is not important, we mainly check that the large
+  // object will be properly accounted for.
+  EXPECT_TRUE(DB->getStorageSize() > 10 &&
+              DB->getStorageSize() < LargeDataSize);
+
+  SmallString<16> Buffer;
+  Buffer.resize(LargeDataSize);
+  ASSERT_THAT_ERROR(store(Buffer, {}).moveInto(ID1), Succeeded());
+  size_t StorageSize = DB->getStorageSize();
+  EXPECT_TRUE(StorageSize > LargeDataSize);
+
+  // Close & re-open the DB and check that it reports the same storage size.
+  DB.reset();
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
+      Succeeded());
+  EXPECT_EQ(DB->getStorageSize(), StorageSize);
+}
+
+TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInSingleNode) {
+  unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true);
+  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType))
+          .moveInto(UpstreamDB),
+      Succeeded());
+  {
+    std::optional<ObjectID> ID1;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "hello", {}).moveInto(ID1),
+                      Succeeded());
+    std::optional<ObjectID> ID2;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "another", {}).moveInto(ID2),
+                      Succeeded());
+    std::optional<ObjectID> ID3;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "world", {*ID1, *ID2}).moveInto(ID3),
+                      Succeeded());
+  }
+
+  unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
+  std::unique_ptr<OnDiskGraphDB> DB;
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
+                          std::move(UpstreamDB),
+                          OnDiskGraphDB::FaultInPolicy::SingleNode)
+          .moveInto(DB),
+      Succeeded());
+
+  ObjectID ID1 = digest(*DB, "hello", {});
+  ObjectID ID2 = digest(*DB, "another", {});
+  ObjectID ID3 = digest(*DB, "world", {ID1, ID2});
+  ObjectID ID4 = digest(*DB, "world", {});
+
+  EXPECT_TRUE(DB->containsObject(ID1));
+  EXPECT_TRUE(DB->containsObject(ID2));
+  EXPECT_TRUE(DB->containsObject(ID3));
+  EXPECT_FALSE(DB->containsObject(ID4));
+
+  EXPECT_TRUE(DB->getExistingReference(digest("hello", {})).has_value());
+  EXPECT_TRUE(DB->getExistingReference(DB->getDigest(ID3)).has_value());
+  EXPECT_FALSE(DB->getExistingReference(digest("world", {})).has_value());
+
+  {
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded());
+    ASSERT_TRUE(Obj.has_value());
+    EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello");
+    auto Refs = DB->getObjectRefs(*Obj);
+    EXPECT_TRUE(Refs.empty());
+  }
+  {
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj), Succeeded());
+    ASSERT_TRUE(Obj.has_value());
+    EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "world");
+    auto Refs = DB->getObjectRefs(*Obj);
+    ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2);
+    EXPECT_EQ(Refs.begin()[0], ID1);
+    EXPECT_EQ(Refs.begin()[1], ID2);
+  }
+  {
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB->load(ID4).moveInto(Obj), Succeeded());
+    EXPECT_FALSE(Obj.has_value());
+  }
+
+  // Re-open the primary without chaining, to verify the data were copied from
+  // the upstream.
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
+                          /*UpstreamDB=*/nullptr,
+                          OnDiskGraphDB::FaultInPolicy::SingleNode)
+          .moveInto(DB),
+      Succeeded());
+  ID1 = digest(*DB, "hello", {});
+  ID2 = digest(*DB, "another", {});
+  ID3 = digest(*DB, "world", {ID1, ID2});
+  EXPECT_TRUE(DB->containsObject(ID1));
+  EXPECT_FALSE(DB->containsObject(ID2));
+  EXPECT_TRUE(DB->containsObject(ID3));
+  {
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded());
+    ASSERT_TRUE(Obj.has_value());
+    EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello");
+    auto Refs = DB->getObjectRefs(*Obj);
+    EXPECT_TRUE(Refs.empty());
+  }
+}
+
+TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInFullTree) {
+  unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true);
+  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType))
+          .moveInto(UpstreamDB),
+      Succeeded());
+  HashType RootHash;
+  {
+    std::optional<ObjectID> ID11;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "11", {}).moveInto(ID11), Succeeded());
+    std::optional<ObjectID> ID121;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "121", {}).moveInto(ID121),
+                      Succeeded());
+    std::optional<ObjectID> ID12;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "12", {*ID121}).moveInto(ID12),
+                      Succeeded());
+    std::optional<ObjectID> ID1;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "1", {*ID11, *ID12}).moveInto(ID1),
+                      Succeeded());
+    std::optional<ObjectID> ID21;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "21", {}).moveInto(ID21), Succeeded());
+    std::optional<ObjectID> ID22;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "22", {}).moveInto(ID22), Succeeded());
+    std::optional<ObjectID> ID2;
+    ASSERT_THAT_ERROR(
+        store(*UpstreamDB, "2", {*ID12, *ID21, *ID22}).moveInto(ID2),
+        Succeeded());
+    std::optional<ObjectID> IDRoot;
+    ASSERT_THAT_ERROR(store(*UpstreamDB, "root", {*ID1, *ID2}).moveInto(IDRoot),
+                      Succeeded());
+    ArrayRef<uint8_t> Digest = UpstreamDB->getDigest(*IDRoot);
+    ASSERT_EQ(Digest.size(), RootHash.size());
+    llvm::copy(Digest, RootHash.data());
+  }
+
+  unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
+  std::unique_ptr<OnDiskGraphDB> DB;
+  ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
+                                        std::move(UpstreamDB),
+                                        OnDiskGraphDB::FaultInPolicy::FullTree)
+                        .moveInto(DB),
+                    Succeeded());
+
+  {
+    std::optional<ObjectID> IDRoot;
+    ASSERT_THAT_ERROR(DB->getReference(RootHash).moveInto(IDRoot), Succeeded());
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB->load(*IDRoot).moveInto(Obj), Succeeded());
+    ASSERT_TRUE(Obj.has_value());
+    EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "root");
+    auto Refs = DB->getObjectRefs(*Obj);
+    ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2);
+  }
+
+  // Re-open the primary without chaining, to verify the data were copied from
+  // the upstream.
+  ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
+                                        /*UpstreamDB=*/nullptr,
+                                        OnDiskGraphDB::FaultInPolicy::FullTree)
+                        .moveInto(DB),
+                    Succeeded());
+
+  std::optional<ObjectID> IDRoot;
+  ASSERT_THAT_ERROR(DB->getReference(RootHash).moveInto(IDRoot), Succeeded());
+  std::string PrintedTree;
+  raw_string_ostream OS(PrintedTree);
+  ASSERT_THAT_ERROR(printTree(*DB, *IDRoot, OS), Succeeded());
+  StringRef Expected = R"(root
+  1
+    11
+    12
+      121
+  2
+    12
+      121
+    21
+    22
+)";
+  EXPECT_EQ(PrintedTree, Expected);
+}
+
+TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) {
+  auto tryFaultInPolicyConflict = [](OnDiskGraphDB::FaultInPolicy Policy1,
+                                     OnDiskGraphDB::FaultInPolicy Policy2) {
+    unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true);
+    std::unique_ptr<OnDiskGraphDB> UpstreamDB;
+    ASSERT_THAT_ERROR(
+        OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType))
+            .moveInto(UpstreamDB),
+        Succeeded());
+
+    unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
+    std::unique_ptr<OnDiskGraphDB> DB;
+    ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3",
+                                          sizeof(HashType),
+                                          std::move(UpstreamDB), Policy1)
+                          .moveInto(DB),
+                      Succeeded());
+    DB.reset();
+    ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3",
+                                          sizeof(HashType),
+                                          std::move(UpstreamDB), Policy2)
+                          .moveInto(DB),
+                      Failed());
+  };
+  // Open as 'single', then as 'full'.
+  tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::SingleNode,
+                           OnDiskGraphDB::FaultInPolicy::FullTree);
+  // Open as 'full', then as 'single'.
+  tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::FullTree,
+                           OnDiskGraphDB::FaultInPolicy::SingleNode);
+}
+
+#if defined(EXPENSIVE_CHECKS)
+TEST_F(OnDiskCASTest, OnDiskGraphDBSpaceLimit) {
+  setMaxOnDiskCASMappingSize();
+  unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
+  std::unique_ptr<OnDiskGraphDB> DB;
+  ASSERT_THAT_ERROR(
+      OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
+      Succeeded());
+
+  std::optional<ObjectID> ID;
+  std::string Data(500, '0');
+  auto storeSmallObject = [&]() {
+    SmallVector<ObjectID, 1> Refs;
+    if (ID)
+      Refs.push_back(*ID);
+    ASSERT_THAT_ERROR(store(*DB, Data, Refs).moveInto(ID), Succeeded());
+  };
+
+  // Insert enough small elements to overflow the data pool.
+  for (unsigned I = 0; I < 1024 * 256; ++I)
+    storeSmallObject();
+
+  EXPECT_GE(DB->getHardStorageLimitUtilization(), 99U);
+}
+#endif
diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
new file mode 100644
index 0000000000000..89c03b890a488
--- /dev/null
+++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "CASTestConfig.h"
+#include "OnDiskCommonUtils.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+using namespace llvm::unittest::cas;
+
+TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) {
+  unittest::TempDir Temp("ondiskkv", /*Unique=*/true);
+  std::unique_ptr<OnDiskKeyValueDB> DB;
+  ASSERT_THAT_ERROR(OnDiskKeyValueDB::open(Temp.path(), "blake3",
+                                           sizeof(HashType), "test",
+                                           sizeof(ValueType))
+                        .moveInto(DB),
+                    Succeeded());
+  {
+    std::optional<ArrayRef<char>> Val;
+    ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded());
+    EXPECT_FALSE(Val.has_value());
+  }
+
+  ValueType ValW = valueFromString("world");
+  ArrayRef<char> Val;
+  ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded());
+  EXPECT_EQ(Val, ArrayRef(ValW));
+  ASSERT_THAT_ERROR(
+      DB->put(digest("hello"), valueFromString("other")).moveInto(Val),
+      Succeeded());
+  EXPECT_EQ(Val, ArrayRef(ValW));
+
+  {
+    std::optional<ArrayRef<char>> Val;
+    ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded());
+    EXPECT_TRUE(Val.has_value());
+    EXPECT_EQ(*Val, ArrayRef(ValW));
+  }
+
+  // Validate
+  {
+    auto ValidateFunc = [](FileOffset Offset, ArrayRef<char> Data) -> Error {
+      EXPECT_EQ(Data.size(), sizeof(ValueType));
+      return Error::success();
+    };
+    ASSERT_THAT_ERROR(DB->validate(ValidateFunc), Succeeded());
+  }
+
+  // Size
+  {
+    size_t InitSize = DB->getStorageSize();
+    unsigned InitPrecent = DB->getHardStorageLimitUtilization();
+
+    // Insert a lot of entries.
+    for (unsigned I = 0; I < 1024 * 100; ++I) {
+      std::string Index = Twine(I).str();
+      ArrayRef<char> Val;
+      ASSERT_THAT_ERROR(
+          DB->put(digest(Index), valueFromString(Index)).moveInto(Val),
+          Succeeded());
+    }
+
+    EXPECT_GT(DB->getStorageSize(), InitSize);
+    EXPECT_GT(DB->getHardStorageLimitUtilization(), InitPrecent);
+  }
+}