@@ -937,18 +937,35 @@ OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
937
937
938
938
ObjectID OnDiskGraphDB::getReference (ArrayRef<uint8_t > Hash) {
939
939
IndexProxy I = indexHash (Hash);
940
+ return getExternalReference (I);
941
+ }
942
+
943
+ ObjectID OnDiskGraphDB::getExternalReference (const IndexProxy &I) {
940
944
return getExternalReference (makeInternalRef (I.Offset ));
941
945
}
942
946
943
947
std::optional<ObjectID>
944
- OnDiskGraphDB::getExistingReference (ArrayRef<uint8_t > Digest) const {
948
+ OnDiskGraphDB::getExistingReference (ArrayRef<uint8_t > Digest) {
949
+ auto tryUpstream =
950
+ [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
951
+ if (!UpstreamDB)
952
+ return std::nullopt;
953
+ std::optional<ObjectID> UpstreamID =
954
+ UpstreamDB->getExistingReference (Digest);
955
+ if (!UpstreamID)
956
+ return std::nullopt;
957
+ if (!I)
958
+ I.emplace (indexHash (Digest));
959
+ return getExternalReference (*I);
960
+ };
961
+
945
962
OnDiskHashMappedTrie::const_pointer P = Index.find (Digest);
946
963
if (!P)
947
- return std::nullopt;
964
+ return tryUpstream ( std::nullopt) ;
948
965
IndexProxy I = getIndexProxyFromPointer (P);
949
966
TrieRecord::Data Obj = I.Ref .load ();
950
967
if (Obj.SK == TrieRecord::StorageKind::Unknown)
951
- return std::nullopt ;
968
+ return tryUpstream (I) ;
952
969
return getExternalReference (makeInternalRef (I.Offset ));
953
970
}
954
971
@@ -991,8 +1008,11 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {
991
1008
IndexProxy I = getIndexProxyFromRef (Ref);
992
1009
TrieRecord::Data Object = I.Ref .load ();
993
1010
994
- if (Object.SK == TrieRecord::StorageKind::Unknown)
995
- return std::nullopt;
1011
+ if (Object.SK == TrieRecord::StorageKind::Unknown) {
1012
+ if (!UpstreamDB)
1013
+ return std::nullopt;
1014
+ return faultInFromUpstream (ExternalRef);
1015
+ }
996
1016
997
1017
auto toObjectHandle = [](InternalHandle H) -> ObjectHandle {
998
1018
return ObjectHandle::fromOpaqueData (H.getRawData ());
@@ -1035,14 +1055,21 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {
1035
1055
->insert (I.Hash , Object.SK , std::move (*OwnedBuffer))));
1036
1056
}
1037
1057
1038
- bool OnDiskGraphDB::containsObject (ObjectID ExternalRef) const {
1058
+ bool OnDiskGraphDB::containsObject (ObjectID ExternalRef,
1059
+ bool CheckUpstream) const {
1039
1060
InternalRef Ref = getInternalRef (ExternalRef);
1040
1061
IndexProxy I = getIndexProxyFromRef (Ref);
1041
1062
TrieRecord::Data Object = I.Ref .load ();
1042
- return Object.SK != TrieRecord::StorageKind::Unknown;
1063
+ if (Object.SK != TrieRecord::StorageKind::Unknown)
1064
+ return true ;
1065
+ if (!CheckUpstream || !UpstreamDB)
1066
+ return false ;
1067
+ std::optional<ObjectID> UpstreamID =
1068
+ UpstreamDB->getExistingReference (getDigest (I));
1069
+ return UpstreamID.has_value ();
1043
1070
}
1044
1071
1045
- InternalRef OnDiskGraphDB::makeInternalRef (FileOffset IndexOffset) const {
1072
+ InternalRef OnDiskGraphDB::makeInternalRef (FileOffset IndexOffset) {
1046
1073
return InternalRef::getFromOffset (IndexOffset);
1047
1074
}
1048
1075
@@ -1260,9 +1287,9 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
1260
1287
return Error::success ();
1261
1288
}
1262
1289
1263
- Expected<std::unique_ptr<OnDiskGraphDB>>
1264
- OnDiskGraphDB::open ( StringRef AbsPath, StringRef HashName,
1265
- unsigned HashByteSize ) {
1290
+ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open (
1291
+ StringRef AbsPath, StringRef HashName, unsigned HashByteSize ,
1292
+ std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy ) {
1266
1293
if (std::error_code EC = sys::fs::create_directories (AbsPath))
1267
1294
return createFileError (AbsPath, EC);
1268
1295
@@ -1280,21 +1307,27 @@ OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName,
1280
1307
return std::move (E);
1281
1308
1282
1309
std::optional<OnDiskDataAllocator> DataPool;
1310
+ StringRef PolicyName =
1311
+ Policy == FaultInPolicy::SingleNode ? " single" : " full" ;
1283
1312
if (Error E = OnDiskDataAllocator::create (
1284
1313
AbsPath + Slash + FilePrefix + DataPoolFile,
1285
- DataPoolTableName + " [" + HashName + " ]" ,
1314
+ DataPoolTableName + " [" + HashName + " ]" + PolicyName ,
1286
1315
/* MaxFileSize=*/ 16 * GB, /* MinFileSize=*/ MB)
1287
1316
.moveInto (DataPool))
1288
1317
return std::move (E);
1289
1318
1290
1319
return std::unique_ptr<OnDiskGraphDB>(
1291
- new OnDiskGraphDB (AbsPath, std::move (*Index), std::move (*DataPool)));
1320
+ new OnDiskGraphDB (AbsPath, std::move (*Index), std::move (*DataPool),
1321
+ std::move (UpstreamDB), Policy));
1292
1322
}
1293
1323
1294
1324
OnDiskGraphDB::OnDiskGraphDB (StringRef RootPath, OnDiskHashMappedTrie Index,
1295
- OnDiskDataAllocator DataPool)
1325
+ OnDiskDataAllocator DataPool,
1326
+ std::unique_ptr<OnDiskGraphDB> UpstreamDB,
1327
+ FaultInPolicy Policy)
1296
1328
: Index(std::move(Index)), DataPool(std::move(DataPool)),
1297
- RootPath(RootPath.str()) {
1329
+ RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
1330
+ FIPolicy(Policy) {
1298
1331
// / Lifetime for "big" objects not in DataPool.
1299
1332
// /
1300
1333
// / NOTE: Could use ThreadSafeHashMappedTrie here. For now, doing something
@@ -1310,3 +1343,119 @@ OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskHashMappedTrie Index,
1310
1343
OnDiskGraphDB::~OnDiskGraphDB () {
1311
1344
delete static_cast <StandaloneDataMapTy *>(StandaloneData);
1312
1345
}
1346
+
1347
+ Error OnDiskGraphDB::importFullTree (ObjectID PrimaryID,
1348
+ ObjectHandle UpstreamNode) {
1349
+ // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1350
+ // against the process dying during importing and leaving the database with an
1351
+ // incomplete tree. Note that if the upstream has missing nodes then the tree
1352
+ // will be copied with missing nodes as well, it won't be considered an error.
1353
+
1354
+ struct UpstreamCursor {
1355
+ ObjectHandle Node;
1356
+ size_t RefsCount;
1357
+ object_refs_iterator RefI;
1358
+ object_refs_iterator RefE;
1359
+ };
1360
+ // / Keeps track of the state of visitation for current node and all of its
1361
+ // / parents.
1362
+ SmallVector<UpstreamCursor, 16 > CursorStack;
1363
+ // / Keeps track of the currently visited nodes as they are imported into
1364
+ // / primary database, from current node and its parents. When a node is
1365
+ // / entered for visitation it appends its own ID, then appends referenced IDs
1366
+ // / as they get imported. When a node is fully imported it removes the
1367
+ // / referenced IDs from the bottom of the stack which leaves its own ID at the
1368
+ // / bottom, adding to the list of referenced IDs for the parent node.
1369
+ SmallVector<ObjectID, 128 > PrimaryNodesStack;
1370
+
1371
+ auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
1372
+ PrimaryNodesStack.push_back (PrimaryID);
1373
+ if (!Node)
1374
+ return ;
1375
+ auto Refs = UpstreamDB->getObjectRefs (*Node);
1376
+ CursorStack.push_back ({*Node,
1377
+ (size_t )std::distance (Refs.begin (), Refs.end ()),
1378
+ Refs.begin (), Refs.end ()});
1379
+ };
1380
+
1381
+ enqueueNode (PrimaryID, UpstreamNode);
1382
+
1383
+ while (!CursorStack.empty ()) {
1384
+ UpstreamCursor &Cur = CursorStack.back ();
1385
+ if (Cur.RefI == Cur.RefE ) {
1386
+ // Copy the node data into the primary store.
1387
+ // FIXME: Use hard-link or cloning if the file-system supports it and data
1388
+ // is stored into a separate file.
1389
+
1390
+ // The bottom of \p PrimaryNodesStack contains the primary ID for the
1391
+ // current node plus the list of imported referenced IDs.
1392
+ assert (PrimaryNodesStack.size () >= Cur.RefsCount + 1 );
1393
+ ObjectID PrimaryID = *(PrimaryNodesStack.end () - Cur.RefsCount - 1 );
1394
+ auto PrimaryRefs = ArrayRef (PrimaryNodesStack)
1395
+ .slice (PrimaryNodesStack.size () - Cur.RefsCount );
1396
+ auto Data = UpstreamDB->getObjectData (Cur.Node );
1397
+ if (Error E = store (PrimaryID, PrimaryRefs, Data))
1398
+ return E;
1399
+ // Remove the current node and its IDs from the stack.
1400
+ PrimaryNodesStack.truncate (PrimaryNodesStack.size () - Cur.RefsCount );
1401
+ CursorStack.pop_back ();
1402
+ continue ;
1403
+ }
1404
+
1405
+ ObjectID UpstreamID = *(Cur.RefI ++);
1406
+ ObjectID PrimaryID = getReference (UpstreamDB->getDigest (UpstreamID));
1407
+ if (containsObject (PrimaryID, /* CheckUpstream=*/ false )) {
1408
+ // This \p ObjectID already exists in the primary. Either it was imported
1409
+ // via \p importFullTree or the client created it, in which case the
1410
+ // client takes responsibility for how it was formed.
1411
+ enqueueNode (PrimaryID, std::nullopt);
1412
+ continue ;
1413
+ }
1414
+ Expected<std::optional<ObjectHandle>> UpstreamNode =
1415
+ UpstreamDB->load (UpstreamID);
1416
+ if (!UpstreamNode)
1417
+ return UpstreamNode.takeError ();
1418
+ enqueueNode (PrimaryID, *UpstreamNode);
1419
+ }
1420
+
1421
+ assert (PrimaryNodesStack.size () == 1 );
1422
+ assert (PrimaryNodesStack.front () == PrimaryID);
1423
+ return Error::success ();
1424
+ }
1425
+
1426
+ Error OnDiskGraphDB::importSingleNode (ObjectID PrimaryID,
1427
+ ObjectHandle UpstreamNode) {
1428
+ // Copies only a single node, it doesn't copy the referenced nodes.
1429
+
1430
+ // Copy the node data into the primary store.
1431
+ // FIXME: Use hard-link or cloning if the file-system supports it and data is
1432
+ // stored into a separate file.
1433
+
1434
+ auto Data = UpstreamDB->getObjectData (UpstreamNode);
1435
+ auto UpstreamRefs = UpstreamDB->getObjectRefs (UpstreamNode);
1436
+ SmallVector<ObjectID, 64 > Refs;
1437
+ Refs.reserve (std::distance (UpstreamRefs.begin (), UpstreamRefs.end ()));
1438
+ for (ObjectID UpstreamRef : UpstreamRefs)
1439
+ Refs.push_back (getReference (UpstreamDB->getDigest (UpstreamRef)));
1440
+
1441
+ return store (PrimaryID, Refs, Data);
1442
+ }
1443
+
1444
+ Expected<std::optional<ObjectHandle>>
1445
+ OnDiskGraphDB::faultInFromUpstream (ObjectID PrimaryID) {
1446
+ assert (UpstreamDB);
1447
+
1448
+ ObjectID UpstreamID = UpstreamDB->getReference (getDigest (PrimaryID));
1449
+ Expected<std::optional<ObjectHandle>> UpstreamNode =
1450
+ UpstreamDB->load (UpstreamID);
1451
+ if (!UpstreamNode)
1452
+ return UpstreamNode.takeError ();
1453
+ if (!*UpstreamNode)
1454
+ return std::nullopt;
1455
+
1456
+ if (Error E = FIPolicy == FaultInPolicy::SingleNode
1457
+ ? importSingleNode (PrimaryID, **UpstreamNode)
1458
+ : importFullTree (PrimaryID, **UpstreamNode))
1459
+ return std::move (E);
1460
+ return load (PrimaryID);
1461
+ }
0 commit comments