Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ namespace llvm {
namespace msf {
class MappedBlockStream;
}
namespace codeview {
class PublicSym32;
}
namespace pdb {
struct PublicsStreamHeader;
struct SectionOffset;
class SymbolStream;

class PublicsStream {
public:
Expand All @@ -42,6 +46,20 @@ class PublicsStream {
return SectionOffsets;
}

/// Find a public symbol by a segment and offset.
///
/// In case there is more than one symbol (for example due to ICF), the first
/// one is returned.
///
/// \return If a symbol was found, the symbol at the provided address is
/// returned as well as the index of this symbol in the address map. If
/// the binary was linked with ICF, there might be more symbols with the
/// same address after the returned one. If no symbol is found,
/// `std::nullopt` is returned.
LLVM_ABI std::optional<std::pair<codeview::PublicSym32, size_t>>
findByAddress(const SymbolStream &Symbols, uint16_t Segment,
uint32_t Offset) const;

private:
std::unique_ptr<msf::MappedBlockStream> Stream;
GSIHashTable PublicsTable;
Expand Down
50 changes: 50 additions & 0 deletions llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
#include <cstdint>
Expand Down Expand Up @@ -96,3 +99,50 @@ Error PublicsStream::reload() {
"Corrupted publics stream.");
return Error::success();
}

// This is a reimplementation of NearestSym:
// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
std::optional<std::pair<codeview::PublicSym32, size_t>>
PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
uint32_t Offset) const {
// The address map is sorted by address, so we can use lower_bound to find the
// position. Each element is an offset into the symbols for a public symbol.
auto It = llvm::lower_bound(
AddressMap, std::tuple(Segment, Offset),
[&](support::ulittle32_t Cur, auto Addr) {
auto Sym = Symbols.readRecord(Cur.value());
if (Sym.kind() != codeview::S_PUB32)
return false; // stop here, this is most likely corrupted debug info
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you able to craft a small test that can exercice this codepath?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a check. I'm not sure if it's fine to just modify the backing data there, but the test works.


auto Psym =
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(
Sym);
if (!Psym) {
consumeError(Psym.takeError());
return false;
}

return std::tie(Psym->Segment, Psym->Offset) < Addr;
});

if (It == AddressMap.end())
return std::nullopt;

auto Sym = Symbols.readRecord(It->value());
if (Sym.kind() != codeview::S_PUB32)
return std::nullopt; // this is most likely corrupted debug info

auto MaybePsym =
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
if (!MaybePsym) {
consumeError(MaybePsym.takeError());
return std::nullopt;
}
codeview::PublicSym32 Psym = std::move(*MaybePsym);

if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset))
return std::nullopt;

std::ptrdiff_t IterOffset = It - AddressMap.begin();
return std::pair{Psym, static_cast<size_t>(IterOffset)};
}
1 change: 1 addition & 0 deletions llvm/unittests/DebugInfo/PDB/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests
StringTableBuilderTest.cpp
PDBApiTest.cpp
PDBVariantTest.cpp
PublicsStreamTest.cpp
)

target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport)
226 changes: 226 additions & 0 deletions llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/Support/BinaryByteStream.h"

#include "gtest/gtest.h"

using namespace llvm;
using namespace llvm::pdb;

namespace {
struct PublicSym {
llvm::StringRef Name;
uint16_t Segment;
uint32_t Offset;
};

class MockPublics {
public:
MockPublics(size_t StreamSize, BumpPtrAllocator &Alloc,
msf::MSFBuilder Builder);
static Expected<std::unique_ptr<MockPublics>>
create(BumpPtrAllocator &Allocator, size_t StreamSize);

void addPublics(ArrayRef<PublicSym> Syms);
Error finish();

PublicsStream *publicsStream();
SymbolStream *symbolStream();

MutableBinaryByteStream &stream() { return Stream; }

private:
MutableBinaryByteStream Stream;

msf::MSFBuilder MsfBuilder;
std::optional<msf::MSFLayout> MsfLayout;

GSIStreamBuilder Gsi;

std::unique_ptr<PublicsStream> Publics;
std::unique_ptr<SymbolStream> Symbols;
};

MockPublics::MockPublics(size_t StreamSize, BumpPtrAllocator &Allocator,
msf::MSFBuilder Builder)
: Stream({Allocator.Allocate<uint8_t>(StreamSize), StreamSize},
llvm::endianness::little),
MsfBuilder(std::move(Builder)), Gsi(this->MsfBuilder) {}

Expected<std::unique_ptr<MockPublics>>
MockPublics::create(BumpPtrAllocator &Allocator, size_t StreamSize) {
auto ExpectedMsf = msf::MSFBuilder::create(Allocator, 4096);
if (!ExpectedMsf)
return ExpectedMsf.takeError();
return std::make_unique<MockPublics>(StreamSize, Allocator,
std::move(*ExpectedMsf));
}

void MockPublics::addPublics(ArrayRef<PublicSym> Publics) {
std::vector<BulkPublic> Bulks;
for (const auto &Sym : Publics) {
BulkPublic BP;
BP.Name = Sym.Name.data();
BP.NameLen = Sym.Name.size();
BP.Offset = Sym.Offset;
BP.Segment = Sym.Segment;
Bulks.emplace_back(BP);
}
Gsi.addPublicSymbols(std::move(Bulks));
}

Error MockPublics::finish() {
auto Err = Gsi.finalizeMsfLayout();
if (Err)
return Err;

auto ExpectedLayout = MsfBuilder.generateLayout();
if (!ExpectedLayout)
return ExpectedLayout.takeError();
MsfLayout = std::move(*ExpectedLayout);

return Gsi.commit(*MsfLayout, Stream);
}

PublicsStream *MockPublics::publicsStream() {
if (!Publics) {
Publics = std::make_unique<PublicsStream>(
msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream,
Gsi.getPublicsStreamIndex(),
MsfBuilder.getAllocator()));
}
return Publics.get();
}

SymbolStream *MockPublics::symbolStream() {
if (!Symbols) {
Symbols = std::make_unique<SymbolStream>(
msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream,
Gsi.getRecordStreamIndex(),
MsfBuilder.getAllocator()));
}
return Symbols.get();
}

std::array GSymbols{
PublicSym{"??0Base@@QEAA@XZ", /*Segment=*/1, /*Offset=*/0},
PublicSym{"??0Derived@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32},
PublicSym{"??0Derived2@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32},
PublicSym{"??0Derived3@@QEAA@XZ", /*Segment=*/1, /*Offset=*/80},
PublicSym{"??1Base@@UEAA@XZ", /*Segment=*/1, /*Offset=*/160},
PublicSym{"??1Derived@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176},
PublicSym{"??1Derived2@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176},
PublicSym{"??1Derived3@@UEAA@XZ", /*Segment=*/1, /*Offset=*/208},
PublicSym{"??3@YAXPEAX_K@Z", /*Segment=*/1, /*Offset=*/256},
PublicSym{"??_EDerived3@@W7EAAPEAXI@Z", /*Segment=*/1, /*Offset=*/268},
PublicSym{"??_GBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288},
PublicSym{"??_EBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288},
PublicSym{"??_EDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
PublicSym{"??_EDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
PublicSym{"??_GDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
PublicSym{"??_GDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
PublicSym{"??_EDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416},
PublicSym{"??_GDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416},
PublicSym{"?AMethod@AClass@@QEAAXHPEAD@Z", /*Segment=*/1, /*Offset=*/480},
PublicSym{"?Something@AClass@@SA_ND@Z", /*Segment=*/1, /*Offset=*/496},
PublicSym{"?dup1@@YAHH@Z", /*Segment=*/1, /*Offset=*/544},
PublicSym{"?dup3@@YAHH@Z", /*Segment=*/1, /*Offset=*/544},
PublicSym{"?dup2@@YAHH@Z", /*Segment=*/1, /*Offset=*/544},
PublicSym{"?foobar@@YAHH@Z", /*Segment=*/1, /*Offset=*/560},
PublicSym{"main", /*Segment=*/1, /*Offset=*/576},
PublicSym{"??_7Base@@6B@", /*Segment=*/2, /*Offset=*/0},
PublicSym{"??_7Derived@@6B@", /*Segment=*/2, /*Offset=*/8},
PublicSym{"??_7Derived2@@6B@", /*Segment=*/2, /*Offset=*/8},
PublicSym{"??_7Derived3@@6BDerived2@@@", /*Segment=*/2, /*Offset=*/16},
PublicSym{"??_7Derived3@@6BDerived@@@", /*Segment=*/2, /*Offset=*/24},
PublicSym{"?AGlobal@@3HA", /*Segment=*/3, /*Offset=*/0},
};

} // namespace

static std::pair<uint32_t, uint32_t>
nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) {
auto Index = Publics->getAddressMap()[N].value();
codeview::CVSymbol Sym = Symbols->readRecord(Index);
auto ExpectedPub =
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
if (!ExpectedPub)
return std::pair(0, 0);
return std::pair(ExpectedPub->Segment, ExpectedPub->Offset);
}

TEST(PublicsStreamTest, FindByAddress) {
BumpPtrAllocator Allocator;
auto ExpectedMock = MockPublics::create(Allocator, 1 << 20);
ASSERT_TRUE(bool(ExpectedMock));
std::unique_ptr<MockPublics> Mock = std::move(*ExpectedMock);

Mock->addPublics(GSymbols);
Error Err = Mock->finish();
ASSERT_FALSE(Err) << Err;

auto *Publics = Mock->publicsStream();
ASSERT_NE(Publics, nullptr);
Err = Publics->reload();
ASSERT_FALSE(Err) << Err;

auto *Symbols = Mock->symbolStream();
ASSERT_NE(Symbols, nullptr);
Err = Symbols->reload();
ASSERT_FALSE(Err) << Err;

auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8);
ASSERT_TRUE(VTableDerived.has_value());
// both derived and derived2 have their vftables there - but derived2 is first
// (due to ICF)
ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
ASSERT_EQ(VTableDerived->second, 26u);

// Again, make sure that we find the first symbol
auto VectorDtorDerived = Publics->findByAddress(*Symbols, 1, 352);
ASSERT_TRUE(VectorDtorDerived.has_value());
ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI@Z");
ASSERT_EQ(VectorDtorDerived->second, 12u);
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1u, 352u));
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1u, 352u));
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1u, 352u));
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1u, 416u));

ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());

auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
ASSERT_TRUE(GlobalSym.has_value());
ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
ASSERT_EQ(GlobalSym->second, 30u);

// test corrupt debug info
codeview::CVSymbol GlobalCVSym =
Symbols->readRecord(Publics->getAddressMap()[30]);
ASSERT_EQ(GlobalCVSym.kind(), codeview::S_PUB32);
// CVSymbol::data returns a pointer to const data, so we modify the backing
// data
uint8_t *PDBData = Mock->stream().data().data();
auto Offset = GlobalCVSym.data().data() - PDBData;
reinterpret_cast<codeview::RecordPrefix *>(PDBData + Offset)->RecordKind =
codeview::S_GDATA32;
ASSERT_EQ(GlobalCVSym.kind(), codeview::S_GDATA32);

GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
ASSERT_FALSE(GlobalSym.has_value());
}