|
51 | 51 | #include "OnDiskCommon.h"
|
52 | 52 | #include "llvm/ADT/DenseMap.h"
|
53 | 53 | #include "llvm/ADT/StringExtras.h"
|
| 54 | +#include "llvm/CAS/OnDiskHashMappedTrie.h" |
54 | 55 | #include "llvm/Support/Alignment.h"
|
55 | 56 | #include "llvm/Support/Compiler.h"
|
56 | 57 | #include "llvm/Support/Errc.h"
|
57 | 58 | #include "llvm/Support/Error.h"
|
| 59 | +#include "llvm/Support/ErrorHandling.h" |
| 60 | +#include "llvm/Support/FileSystem.h" |
| 61 | +#include "llvm/Support/Format.h" |
58 | 62 | #include "llvm/Support/MemoryBuffer.h"
|
59 | 63 | #include "llvm/Support/Path.h"
|
60 | 64 | #include "llvm/Support/Process.h"
|
@@ -869,6 +873,129 @@ int64_t DataRecordHandle::getDataRelOffset() const {
|
869 | 873 | return RelOffset;
|
870 | 874 | }
|
871 | 875 |
|
| 876 | +Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { |
| 877 | + return Index.validate([&](FileOffset Offset, |
| 878 | + OnDiskHashMappedTrie::ConstValueProxy Record) |
| 879 | + -> Error { |
| 880 | + auto formatError = [&](Twine Msg) { |
| 881 | + return createStringError( |
| 882 | + llvm::errc::illegal_byte_sequence, |
| 883 | + "bad record at 0x" + |
| 884 | + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + |
| 885 | + Msg.str()); |
| 886 | + }; |
| 887 | + |
| 888 | + if (Record.Data.size() != sizeof(TrieRecord)) |
| 889 | + return formatError("wrong data record size"); |
| 890 | + if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size())) |
| 891 | + return formatError("wrong data record alignment"); |
| 892 | + |
| 893 | + auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data()); |
| 894 | + TrieRecord::Data D = R->load(); |
| 895 | + std::unique_ptr<MemoryBuffer> FileBuffer; |
| 896 | + if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown && |
| 897 | + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool && |
| 898 | + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone && |
| 899 | + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf && |
| 900 | + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0) |
| 901 | + return formatError("invalid record kind value"); |
| 902 | + |
| 903 | + auto Ref = InternalRef::getFromOffset(Offset); |
| 904 | + auto I = getIndexProxyFromRef(Ref); |
| 905 | + |
| 906 | + switch (D.SK) { |
| 907 | + case TrieRecord::StorageKind::Unknown: |
| 908 | + // This could be an abandoned entry due to a termination before updating |
| 909 | + // the record. It can be reused by later insertion so just skip this entry |
| 910 | + // for now. |
| 911 | + return Error::success(); |
| 912 | + case TrieRecord::StorageKind::DataPool: |
| 913 | + // Check offset is a postive value, and large enough to hold the |
| 914 | + // header for the data record. |
| 915 | + if (D.Offset.get() <= 0 || |
| 916 | + (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >= |
| 917 | + DataPool.size()) |
| 918 | + return formatError("datapool record out of bound"); |
| 919 | + break; |
| 920 | + case TrieRecord::StorageKind::Standalone: |
| 921 | + case TrieRecord::StorageKind::StandaloneLeaf: |
| 922 | + case TrieRecord::StorageKind::StandaloneLeaf0: |
| 923 | + SmallString<256> Path; |
| 924 | + getStandalonePath(TrieRecord::getStandaloneFileSuffix(D.SK), I, Path); |
| 925 | + // If need to validate the content of the file later, just load the |
| 926 | + // buffer here. Otherwise, just check the existance of the file. |
| 927 | + if (Deep) { |
| 928 | + auto File = MemoryBuffer::getFile(Path, /*IsText=*/false, |
| 929 | + /*RequiresNullTerminator=*/false); |
| 930 | + if (!File || !*File) |
| 931 | + return formatError("record file \'" + Path + "\' does not exist"); |
| 932 | + |
| 933 | + FileBuffer = std::move(*File); |
| 934 | + } else if (!llvm::sys::fs::exists(Path)) |
| 935 | + return formatError("record file \'" + Path + "\' does not exist"); |
| 936 | + } |
| 937 | + |
| 938 | + if (!Deep) |
| 939 | + return Error::success(); |
| 940 | + |
| 941 | + auto dataError = [&](Twine Msg) { |
| 942 | + return createStringError(llvm::errc::illegal_byte_sequence, |
| 943 | + "bad data for digest \'" + toHex(I.Hash) + |
| 944 | + "\': " + Msg.str()); |
| 945 | + }; |
| 946 | + SmallVector<ArrayRef<uint8_t>> Refs; |
| 947 | + ArrayRef<char> StoredData; |
| 948 | + |
| 949 | + switch (D.SK) { |
| 950 | + case TrieRecord::StorageKind::Unknown: |
| 951 | + llvm_unreachable("already handled"); |
| 952 | + case TrieRecord::StorageKind::DataPool: { |
| 953 | + auto DataRecord = DataRecordHandle::get(DataPool.beginData(D.Offset)); |
| 954 | + if (DataRecord.getTotalSize() + D.Offset.get() >= DataPool.size()) |
| 955 | + return dataError("data record span passed the end of the data pool"); |
| 956 | + for (auto InternRef : DataRecord.getRefs()) { |
| 957 | + auto Index = getIndexProxyFromRef(InternRef); |
| 958 | + Refs.push_back(Index.Hash); |
| 959 | + } |
| 960 | + StoredData = DataRecord.getData(); |
| 961 | + break; |
| 962 | + } |
| 963 | + case TrieRecord::StorageKind::Standalone: { |
| 964 | + if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header)) |
| 965 | + return dataError("data record is not big enough to read the header"); |
| 966 | + auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart()); |
| 967 | + if (DataRecord.getTotalSize() < FileBuffer->getBufferSize()) |
| 968 | + return dataError( |
| 969 | + "data record span passed the end of the standalone file"); |
| 970 | + for (auto InternRef : DataRecord.getRefs()) { |
| 971 | + auto Index = getIndexProxyFromRef(InternRef); |
| 972 | + Refs.push_back(Index.Hash); |
| 973 | + } |
| 974 | + StoredData = DataRecord.getData(); |
| 975 | + break; |
| 976 | + } |
| 977 | + case TrieRecord::StorageKind::StandaloneLeaf: |
| 978 | + case TrieRecord::StorageKind::StandaloneLeaf0: { |
| 979 | + StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer()); |
| 980 | + if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) { |
| 981 | + if (!FileBuffer->getBuffer().ends_with('\0')) |
| 982 | + return dataError("standalone file is not zero terminated"); |
| 983 | + StoredData = StoredData.drop_back(1); |
| 984 | + } |
| 985 | + break; |
| 986 | + } |
| 987 | + } |
| 988 | + |
| 989 | + SmallVector<uint8_t> ComputedHash; |
| 990 | + Hasher(Refs, StoredData, ComputedHash); |
| 991 | + if (I.Hash != ArrayRef(ComputedHash)) |
| 992 | + return dataError("hash mismatch, got \'" + toHex(ComputedHash) + |
| 993 | + "\' instead"); |
| 994 | + |
| 995 | + return Error::success(); |
| 996 | + }); |
| 997 | +} |
| 998 | + |
872 | 999 | void OnDiskGraphDB::print(raw_ostream &OS) const {
|
873 | 1000 | OS << "on-disk-root-path: " << RootPath << "\n";
|
874 | 1001 |
|
|
0 commit comments