diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h index 2cb4bee8ca5df..c5fdad057e867 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h @@ -18,9 +18,13 @@ namespace llvm { namespace msf { class MappedBlockStream; } +namespace codeview { +class PublicSym32; +} namespace pdb { struct PublicsStreamHeader; struct SectionOffset; +class SymbolStream; class PublicsStream { public: @@ -42,6 +46,20 @@ class PublicsStream { return SectionOffsets; } + /// Find a public symbol by a segment and offset. + /// + /// In case there is more than one symbol (for example due to ICF), the first + /// one is returned. + /// + /// \return If a symbol was found, the symbol at the provided address is + /// returned as well as the index of this symbol in the address map. If + /// the binary was linked with ICF, there might be more symbols with the + /// same address after the returned one. If no symbol is found, + /// `std::nullopt` is returned. + LLVM_ABI std::optional> + findByAddress(const SymbolStream &Symbols, uint16_t Segment, + uint32_t Offset) const; + private: std::unique_ptr Stream; GSIHashTable PublicsTable; diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp index c350e0e0b3e19..0453eea26605b 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -22,9 +22,12 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Error.h" #include @@ -96,3 +99,50 @@ Error PublicsStream::reload() { "Corrupted publics stream."); return Error::success(); } + +// This is a reimplementation of NearestSym: +// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581 +std::optional> +PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, + uint32_t Offset) const { + // The address map is sorted by address, so we can use lower_bound to find the + // position. Each element is an offset into the symbols for a public symbol. + auto It = llvm::lower_bound( + AddressMap, std::tuple(Segment, Offset), + [&](support::ulittle32_t Cur, auto Addr) { + auto Sym = Symbols.readRecord(Cur.value()); + if (Sym.kind() != codeview::S_PUB32) + return false; // stop here, this is most likely corrupted debug info + + auto Psym = + codeview::SymbolDeserializer::deserializeAs( + Sym); + if (!Psym) { + consumeError(Psym.takeError()); + return false; + } + + return std::tie(Psym->Segment, Psym->Offset) < Addr; + }); + + if (It == AddressMap.end()) + return std::nullopt; + + auto Sym = Symbols.readRecord(It->value()); + if (Sym.kind() != codeview::S_PUB32) + return std::nullopt; // this is most likely corrupted debug info + + auto MaybePsym = + codeview::SymbolDeserializer::deserializeAs(Sym); + if (!MaybePsym) { + consumeError(MaybePsym.takeError()); + return std::nullopt; + } + codeview::PublicSym32 Psym = std::move(*MaybePsym); + + if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset)) + return std::nullopt; + + std::ptrdiff_t IterOffset = It - AddressMap.begin(); + return std::pair{Psym, static_cast(IterOffset)}; +} diff --git a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt index ba2a732848f4d..b1b9d2d98c944 100644 --- a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests StringTableBuilderTest.cpp PDBApiTest.cpp PDBVariantTest.cpp + PublicsStreamTest.cpp ) target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp new file mode 100644 index 0000000000000..4b89280cbdb93 --- /dev/null +++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp @@ -0,0 +1,226 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" +#include "llvm/Support/BinaryByteStream.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +struct PublicSym { + llvm::StringRef Name; + uint16_t Segment; + uint32_t Offset; +}; + +class MockPublics { +public: + MockPublics(size_t StreamSize, BumpPtrAllocator &Alloc, + msf::MSFBuilder Builder); + static Expected> + create(BumpPtrAllocator &Allocator, size_t StreamSize); + + void addPublics(ArrayRef Syms); + Error finish(); + + PublicsStream *publicsStream(); + SymbolStream *symbolStream(); + + MutableBinaryByteStream &stream() { return Stream; } + +private: + MutableBinaryByteStream Stream; + + msf::MSFBuilder MsfBuilder; + std::optional MsfLayout; + + GSIStreamBuilder Gsi; + + std::unique_ptr Publics; + std::unique_ptr Symbols; +}; + +MockPublics::MockPublics(size_t StreamSize, BumpPtrAllocator &Allocator, + msf::MSFBuilder Builder) + : Stream({Allocator.Allocate(StreamSize), StreamSize}, + llvm::endianness::little), + MsfBuilder(std::move(Builder)), Gsi(this->MsfBuilder) {} + +Expected> +MockPublics::create(BumpPtrAllocator &Allocator, size_t StreamSize) { + auto ExpectedMsf = msf::MSFBuilder::create(Allocator, 4096); + if (!ExpectedMsf) + return ExpectedMsf.takeError(); + return std::make_unique(StreamSize, Allocator, + std::move(*ExpectedMsf)); +} + +void MockPublics::addPublics(ArrayRef Publics) { + std::vector Bulks; + for (const auto &Sym : Publics) { + BulkPublic BP; + BP.Name = Sym.Name.data(); + BP.NameLen = Sym.Name.size(); + BP.Offset = Sym.Offset; + BP.Segment = Sym.Segment; + Bulks.emplace_back(BP); + } + Gsi.addPublicSymbols(std::move(Bulks)); +} + +Error MockPublics::finish() { + auto Err = Gsi.finalizeMsfLayout(); + if (Err) + return Err; + + auto ExpectedLayout = MsfBuilder.generateLayout(); + if (!ExpectedLayout) + return ExpectedLayout.takeError(); + MsfLayout = std::move(*ExpectedLayout); + + return Gsi.commit(*MsfLayout, Stream); +} + +PublicsStream *MockPublics::publicsStream() { + if (!Publics) { + Publics = std::make_unique( + msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream, + Gsi.getPublicsStreamIndex(), + MsfBuilder.getAllocator())); + } + return Publics.get(); +} + +SymbolStream *MockPublics::symbolStream() { + if (!Symbols) { + Symbols = std::make_unique( + msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream, + Gsi.getRecordStreamIndex(), + MsfBuilder.getAllocator())); + } + return Symbols.get(); +} + +std::array GSymbols{ + PublicSym{"??0Base@@QEAA@XZ", /*Segment=*/1, /*Offset=*/0}, + PublicSym{"??0Derived@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32}, + PublicSym{"??0Derived2@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32}, + PublicSym{"??0Derived3@@QEAA@XZ", /*Segment=*/1, /*Offset=*/80}, + PublicSym{"??1Base@@UEAA@XZ", /*Segment=*/1, /*Offset=*/160}, + PublicSym{"??1Derived@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176}, + PublicSym{"??1Derived2@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176}, + PublicSym{"??1Derived3@@UEAA@XZ", /*Segment=*/1, /*Offset=*/208}, + PublicSym{"??3@YAXPEAX_K@Z", /*Segment=*/1, /*Offset=*/256}, + PublicSym{"??_EDerived3@@W7EAAPEAXI@Z", /*Segment=*/1, /*Offset=*/268}, + PublicSym{"??_GBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288}, + PublicSym{"??_EBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288}, + PublicSym{"??_EDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_EDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_GDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_GDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_EDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416}, + PublicSym{"??_GDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416}, + PublicSym{"?AMethod@AClass@@QEAAXHPEAD@Z", /*Segment=*/1, /*Offset=*/480}, + PublicSym{"?Something@AClass@@SA_ND@Z", /*Segment=*/1, /*Offset=*/496}, + PublicSym{"?dup1@@YAHH@Z", /*Segment=*/1, /*Offset=*/544}, + PublicSym{"?dup3@@YAHH@Z", /*Segment=*/1, /*Offset=*/544}, + PublicSym{"?dup2@@YAHH@Z", /*Segment=*/1, /*Offset=*/544}, + PublicSym{"?foobar@@YAHH@Z", /*Segment=*/1, /*Offset=*/560}, + PublicSym{"main", /*Segment=*/1, /*Offset=*/576}, + PublicSym{"??_7Base@@6B@", /*Segment=*/2, /*Offset=*/0}, + PublicSym{"??_7Derived@@6B@", /*Segment=*/2, /*Offset=*/8}, + PublicSym{"??_7Derived2@@6B@", /*Segment=*/2, /*Offset=*/8}, + PublicSym{"??_7Derived3@@6BDerived2@@@", /*Segment=*/2, /*Offset=*/16}, + PublicSym{"??_7Derived3@@6BDerived@@@", /*Segment=*/2, /*Offset=*/24}, + PublicSym{"?AGlobal@@3HA", /*Segment=*/3, /*Offset=*/0}, +}; + +} // namespace + +static std::pair +nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) { + auto Index = Publics->getAddressMap()[N].value(); + codeview::CVSymbol Sym = Symbols->readRecord(Index); + auto ExpectedPub = + codeview::SymbolDeserializer::deserializeAs(Sym); + if (!ExpectedPub) + return std::pair(0, 0); + return std::pair(ExpectedPub->Segment, ExpectedPub->Offset); +} + +TEST(PublicsStreamTest, FindByAddress) { + BumpPtrAllocator Allocator; + auto ExpectedMock = MockPublics::create(Allocator, 1 << 20); + ASSERT_TRUE(bool(ExpectedMock)); + std::unique_ptr Mock = std::move(*ExpectedMock); + + Mock->addPublics(GSymbols); + Error Err = Mock->finish(); + ASSERT_FALSE(Err) << Err; + + auto *Publics = Mock->publicsStream(); + ASSERT_NE(Publics, nullptr); + Err = Publics->reload(); + ASSERT_FALSE(Err) << Err; + + auto *Symbols = Mock->symbolStream(); + ASSERT_NE(Symbols, nullptr); + Err = Symbols->reload(); + ASSERT_FALSE(Err) << Err; + + auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8); + ASSERT_TRUE(VTableDerived.has_value()); + // both derived and derived2 have their vftables there - but derived2 is first + // (due to ICF) + ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@"); + ASSERT_EQ(VTableDerived->second, 26u); + + // Again, make sure that we find the first symbol + auto VectorDtorDerived = Publics->findByAddress(*Symbols, 1, 352); + ASSERT_TRUE(VectorDtorDerived.has_value()); + ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI@Z"); + ASSERT_EQ(VectorDtorDerived->second, 12u); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1u, 352u)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1u, 352u)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1u, 352u)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1u, 416u)); + + ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value()); + ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value()); + + auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0); + ASSERT_TRUE(GlobalSym.has_value()); + ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA"); + ASSERT_EQ(GlobalSym->second, 30u); + + // test corrupt debug info + codeview::CVSymbol GlobalCVSym = + Symbols->readRecord(Publics->getAddressMap()[30]); + ASSERT_EQ(GlobalCVSym.kind(), codeview::S_PUB32); + // CVSymbol::data returns a pointer to const data, so we modify the backing + // data + uint8_t *PDBData = Mock->stream().data().data(); + auto Offset = GlobalCVSym.data().data() - PDBData; + reinterpret_cast(PDBData + Offset)->RecordKind = + codeview::S_GDATA32; + ASSERT_EQ(GlobalCVSym.kind(), codeview::S_GDATA32); + + GlobalSym = Publics->findByAddress(*Symbols, 3, 0); + ASSERT_FALSE(GlobalSym.has_value()); +}