Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ namespace llvm {
namespace msf {
class MappedBlockStream;
}
namespace codeview {
class PublicSym32;
}
namespace pdb {
struct PublicsStreamHeader;
struct SectionOffset;
class SymbolStream;

class PublicsStream {
public:
Expand All @@ -42,6 +46,20 @@ class PublicsStream {
return SectionOffsets;
}

/// Find a public symbol by a segment and offset.
///
/// In case there is more than one symbol (for example due to ICF), the first
/// one is returned.
///
/// \return If a symbol was found, the symbol at the provided address is
/// returned as well as the index of this symbol in the address map. If
/// the binary was linked with ICF, there might be more symbols with the
/// same address after the returned one. If no symbol is found,
/// `std::nullopt` is returned.
LLVM_ABI std::optional<std::pair<codeview::PublicSym32, size_t>>
findByAddress(const SymbolStream &Symbols, uint16_t Segment,
uint32_t Offset) const;

private:
std::unique_ptr<msf::MappedBlockStream> Stream;
GSIHashTable PublicsTable;
Expand Down
91 changes: 91 additions & 0 deletions llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
#include <cstdint>
Expand Down Expand Up @@ -96,3 +99,91 @@ Error PublicsStream::reload() {
"Corrupted publics stream.");
return Error::success();
}

static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffset,
uint16_t RhsSegment, uint32_t RhsOffset) {
if (LhsSegment == RhsSegment)
return LhsOffset - RhsOffset;
return LhsSegment - RhsSegment;
}

static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst,
const codeview::PublicSym32 &Rhs) {
return compareSegmentOffset(LhsSegment, LhsOffst, Rhs.Segment, Rhs.Offset);
}

// This is a reimplementation of NearestSym:
// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
std::optional<std::pair<codeview::PublicSym32, size_t>>
PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
uint32_t Offset) const {
// The address map is sorted by address, so we do binary search.
// Each element is an offset into the symbols for a public symbol.
auto Lo = AddressMap.begin();
auto Hi = AddressMap.end();
Hi -= 1;

while (Lo < Hi) {
auto Cur = Lo + ((Hi - Lo + 1) / 2);
auto Sym = Symbols.readRecord(Cur->value());
if (Sym.kind() != codeview::S_PUB32)
return std::nullopt; // this is most likely corrupted debug info

auto Psym =
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
if (!Psym) {
consumeError(Psym.takeError());
return std::nullopt;
}

uint32_t Cmp = compareSegmentOffset(Segment, Offset, *Psym);
if (Cmp < 0) {
Cur -= 1;
Hi = Cur;
} else if (Cmp == 0)
Lo = Hi = Cur;
else
Lo = Cur;
}

auto Sym = Symbols.readRecord(Lo->value());
if (Sym.kind() != codeview::S_PUB32)
return std::nullopt; // this is most likely corrupted debug info

auto MaybePsym =
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
if (!MaybePsym) {
consumeError(MaybePsym.takeError());
return std::nullopt;
}
codeview::PublicSym32 Psym = std::move(*MaybePsym);

uint32_t Cmp = compareSegmentOffset(Segment, Offset, Psym);
if (Cmp != 0)
return std::nullopt;

// We found a symbol. Due to ICF, multiple symbols can have the same
// address, so return the first one
while (Lo != AddressMap.begin()) {
--Lo;
Sym = Symbols.readRecord(Lo->value());
if (Sym.kind() != codeview::S_PUB32)
return std::nullopt;
MaybePsym =
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
if (!MaybePsym) {
consumeError(MaybePsym.takeError());
return std::nullopt;
}

if (MaybePsym->Segment != Segment || MaybePsym->Offset != Offset) {
++Lo;
break;
}

Psym = std::move(*MaybePsym);
}

std::ptrdiff_t IterOffset = Lo - AddressMap.begin();
return std::pair{Psym, static_cast<size_t>(IterOffset)};
}
1 change: 1 addition & 0 deletions llvm/unittests/DebugInfo/PDB/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests
StringTableBuilderTest.cpp
PDBApiTest.cpp
PDBVariantTest.cpp
PublicsStreamTest.cpp
)

target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport)
46 changes: 46 additions & 0 deletions llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// clang-format off

// Compile with
// cl /Z7 /GR- /GS- PublicSymbols.cpp -c /Gy
// link .\PublicSymbols.obj /DEBUG /NODEFAULTLIB /out:PublicSymbols.exe /ENTRY:main /OPT:ICF
// llvm-pdbutil pdb2yaml --publics-stream PublicSymbols.pdb > PublicSymbols.yaml
// llvm-pdbutil yaml2pdb PublicSymbols.yaml
//
// rm PublicSymbols.exe && rm PublicSymbols.obj && rm PublicSymbols.yaml

int foobar(int i){ return i + 1; }
// these should be merged with ICF
int dup1(int i){ return i + 2; }
int dup2(int i){ return i + 2; }
int dup3(int i){ return i + 2; }

class AClass {
public:
void AMethod(int, char*) {}
static bool Something(char c) {
return c == ' ';
}
};

struct Base {
virtual ~Base() = default;
};
struct Derived : public Base {};
struct Derived2 : public Base {};
struct Derived3 : public Derived2, public Derived {};

int AGlobal;

void operator delete(void *,unsigned __int64) {}

int main() {
foobar(1);
dup1(1);
dup2(1);
dup3(1);
AClass a;
a.AMethod(1, nullptr);
AClass::Something(' ');
Derived3 d3;
return AGlobal;
}
Binary file not shown.
62 changes: 62 additions & 0 deletions llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/MemoryBuffer.h"

#include "llvm/Testing/Support/SupportHelpers.h"

#include "gtest/gtest.h"

using namespace llvm;
using namespace llvm::pdb;

extern const char *TestMainArgv0;

static std::string getExePath() {
SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
llvm::sys::path::append(InputsDir, "PublicSymbols.pdb");
return std::string(InputsDir);
}

TEST(PublicsStreamTest, FindByAddress) {
std::string ExePath = getExePath();
auto Buffer = MemoryBuffer::getFile(ExePath, /*IsText=*/false,
/*RequiresNullTerminator=*/false);
ASSERT_TRUE(bool(Buffer));
auto Stream = std::make_unique<MemoryBufferByteStream>(
std::move(*Buffer), llvm::endianness::little);

BumpPtrAllocator Alloc;
PDBFile File(ExePath, std::move(Stream), Alloc);
ASSERT_FALSE(bool(File.parseFileHeaders()));
ASSERT_FALSE(bool(File.parseStreamData()));

auto Publics = File.getPDBPublicsStream();
ASSERT_TRUE(bool(Publics));
auto Symbols = File.getPDBSymbolStream();
ASSERT_TRUE(bool(Symbols));

auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8);
ASSERT_TRUE(VTableDerived.has_value());
// both derived and derived2 have their vftables there - but derived2 is first
// (due to ICF)
ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
ASSERT_EQ(VTableDerived->second, 26u);

ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());

auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
ASSERT_TRUE(GlobalSym.has_value());
ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
ASSERT_EQ(GlobalSym->second, 30u);
}