Skip to content

Commit dee0afa

Browse files
authored
[BOLT][DWARF] Slice .debug_str from the DWP for each CU (#159540)
Slice .debug_str from the DWP for each CU using .debug_str_offsets and emit it, instead of directly copying the global .debug_str, in order to address the bloat issue of DWO after updates. (more details here - #155766 )
1 parent ae50366 commit dee0afa

File tree

6 files changed

+980
-2
lines changed

6 files changed

+980
-2
lines changed

bolt/include/bolt/Core/DebugData.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,12 @@ class DebugStrOffsetsWriter {
471471
return std::move(StrOffsetsBuffer);
472472
}
473473

474+
/// Returns strings of .debug_str_offsets.
475+
StringRef getBufferStr() {
476+
return StringRef(reinterpret_cast<const char *>(StrOffsetsBuffer->data()),
477+
StrOffsetsBuffer->size());
478+
}
479+
474480
/// Initializes Buffer and Stream.
475481
void initialize(DWARFUnit &Unit);
476482

@@ -507,6 +513,12 @@ class DebugStrWriter {
507513
return std::move(StrBuffer);
508514
}
509515

516+
/// Returns strings of .debug_str.
517+
StringRef getBufferStr() {
518+
return StringRef(reinterpret_cast<const char *>(StrBuffer->data()),
519+
StrBuffer->size());
520+
}
521+
510522
/// Adds string to .debug_str.
511523
/// On first invocation it initializes internal data structures.
512524
uint32_t addString(StringRef Str);

bolt/lib/Rewrite/DWARFRewriter.cpp

Lines changed: 118 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,7 +1723,76 @@ StringRef getSectionName(const SectionRef &Section) {
17231723
return Name;
17241724
}
17251725

1726-
// Extracts an appropriate slice if input is DWP.
1726+
/// Extracts the slice of the .debug_str.dwo section for a given CU from a DWP
1727+
/// file, based on the .debug_str_offsets.dwo section. This helps address DWO
1728+
/// bloat that may occur after updates.
1729+
///
1730+
/// A slice of .debug_str.dwo may be composed of several non-contiguous
1731+
/// fragments. These non-contiguous string views will be written out
1732+
/// sequentially, avoiding the copying overhead caused by assembling them.
1733+
///
1734+
/// The .debug_str_offsets for the first CU often does not need to be updated,
1735+
/// so copying is only performed when .debug_str_offsets requires updating.
1736+
static void UpdateStrAndStrOffsets(StringRef StrDWOContent,
1737+
StringRef StrOffsetsContent,
1738+
SmallVectorImpl<StringRef> &StrDWOOutData,
1739+
std::string &StrOffsetsOutData,
1740+
unsigned DwarfVersion, bool IsLittleEndian) {
1741+
const llvm::endianness Endian =
1742+
IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
1743+
const uint64_t HeaderOffset = (DwarfVersion >= 5) ? 8 : 0;
1744+
constexpr size_t SizeOfOffset = sizeof(int32_t);
1745+
const uint64_t NumOffsets =
1746+
(StrOffsetsContent.size() - HeaderOffset) / SizeOfOffset;
1747+
1748+
DataExtractor Extractor(StrOffsetsContent, IsLittleEndian, 0);
1749+
uint64_t ExtractionOffset = HeaderOffset;
1750+
1751+
using StringFragment = DWARFUnitIndex::Entry::SectionContribution;
1752+
const auto getStringLength = [](StringRef Content,
1753+
uint64_t Offset) -> uint64_t {
1754+
size_t NullPos = Content.find('\0', Offset);
1755+
return (NullPos != StringRef::npos) ? (NullPos - Offset + 1) : 0;
1756+
};
1757+
const auto isContiguous = [](const StringFragment &Fragment,
1758+
uint64_t NextOffset) -> bool {
1759+
return NextOffset == Fragment.getOffset() + Fragment.getLength();
1760+
};
1761+
std::optional<StringFragment> CurrentFragment;
1762+
uint64_t AccumulatedStrLen = 0;
1763+
for (uint64_t I = 0; I < NumOffsets; ++I) {
1764+
const uint64_t StrOffset = Extractor.getU32(&ExtractionOffset);
1765+
const uint64_t StringLength = getStringLength(StrDWOContent, StrOffset);
1766+
if (!CurrentFragment) {
1767+
// First init.
1768+
CurrentFragment = StringFragment(StrOffset, StringLength);
1769+
} else {
1770+
if (isContiguous(*CurrentFragment, StrOffset)) {
1771+
// Expanding the current fragment.
1772+
CurrentFragment->setLength(CurrentFragment->getLength() + StringLength);
1773+
} else {
1774+
// Saving the current fragment and start a new one.
1775+
StrDWOOutData.push_back(StrDWOContent.substr(
1776+
CurrentFragment->getOffset(), CurrentFragment->getLength()));
1777+
CurrentFragment = StringFragment(StrOffset, StringLength);
1778+
}
1779+
}
1780+
if (AccumulatedStrLen != StrOffset) {
1781+
// Updating str offsets.
1782+
if (StrOffsetsOutData.empty())
1783+
StrOffsetsOutData = StrOffsetsContent.str();
1784+
llvm::support::endian::write32(
1785+
&StrOffsetsOutData[HeaderOffset + I * SizeOfOffset],
1786+
static_cast<uint32_t>(AccumulatedStrLen), Endian);
1787+
}
1788+
AccumulatedStrLen += StringLength;
1789+
}
1790+
if (CurrentFragment)
1791+
StrDWOOutData.push_back(StrDWOContent.substr(CurrentFragment->getOffset(),
1792+
CurrentFragment->getLength()));
1793+
}
1794+
1795+
// Exctracts an appropriate slice if input is DWP.
17271796
// Applies patches or overwrites the section.
17281797
std::optional<StringRef> updateDebugData(
17291798
DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents,
@@ -1772,6 +1841,8 @@ std::optional<StringRef> updateDebugData(
17721841
errs() << "BOLT-WARNING: unsupported debug section: " << SectionName
17731842
<< "\n";
17741843
if (StrWriter.isInitialized()) {
1844+
if (CUDWOEntry)
1845+
return StrWriter.getBufferStr();
17751846
OutputBuffer = StrWriter.releaseBuffer();
17761847
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
17771848
OutputBuffer->size());
@@ -1786,6 +1857,8 @@ std::optional<StringRef> updateDebugData(
17861857
}
17871858
case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
17881859
if (StrOffstsWriter.isFinalized()) {
1860+
if (CUDWOEntry)
1861+
return StrOffstsWriter.getBufferStr();
17891862
OutputBuffer = StrOffstsWriter.releaseBuffer();
17901863
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
17911864
OutputBuffer->size());
@@ -1888,18 +1961,61 @@ void DWARFRewriter::writeDWOFiles(
18881961
}
18891962
}
18901963

1964+
StringRef StrDWOContent;
1965+
StringRef StrOffsetsContent;
1966+
llvm::SmallVector<StringRef, 3> StrDWOOutData;
1967+
std::string StrOffsetsOutData;
18911968
for (const SectionRef &Section : File->sections()) {
18921969
std::unique_ptr<DebugBufferVector> OutputData;
18931970
StringRef SectionName = getSectionName(Section);
18941971
if (SectionName == "debug_rnglists.dwo")
18951972
continue;
18961973
Expected<StringRef> ContentsExp = Section.getContents();
18971974
assert(ContentsExp && "Invalid contents.");
1975+
if (IsDWP && SectionName == "debug_str.dwo") {
1976+
if (StrWriter.isInitialized())
1977+
StrDWOContent = StrWriter.getBufferStr();
1978+
else
1979+
StrDWOContent = *ContentsExp;
1980+
continue;
1981+
}
18981982
if (std::optional<StringRef> OutData = updateDebugData(
18991983
(*DWOCU)->getContext(), SectionName, *ContentsExp, KnownSections,
19001984
*Streamer, *this, CUDWOEntry, DWOId, OutputData, RangeListssWriter,
1901-
LocWriter, StrOffstsWriter, StrWriter, OverridenSections))
1985+
LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) {
1986+
if (IsDWP && SectionName == "debug_str_offsets.dwo") {
1987+
StrOffsetsContent = *OutData;
1988+
continue;
1989+
}
19021990
Streamer->emitBytes(*OutData);
1991+
}
1992+
}
1993+
1994+
if (IsDWP) {
1995+
// Handling both .debug_str.dwo and .debug_str_offsets.dwo concurrently. In
1996+
// the original DWP, .debug_str is a deduplicated global table, and the
1997+
// .debug_str.dwo slice for a single CU needs to be extracted according to
1998+
// .debug_str_offsets.dwo.
1999+
UpdateStrAndStrOffsets(StrDWOContent, StrOffsetsContent, StrDWOOutData,
2000+
StrOffsetsOutData, CU.getVersion(),
2001+
(*DWOCU)->getContext().isLittleEndian());
2002+
auto SectionIter = KnownSections.find("debug_str.dwo");
2003+
if (SectionIter != KnownSections.end()) {
2004+
Streamer->switchSection(SectionIter->second.first);
2005+
for (size_t i = 0; i < StrDWOOutData.size(); ++i) {
2006+
StringRef OutData = StrDWOOutData[i];
2007+
if (!OutData.empty())
2008+
Streamer->emitBytes(OutData);
2009+
}
2010+
}
2011+
SectionIter = KnownSections.find("debug_str_offsets.dwo");
2012+
if (SectionIter != KnownSections.end()) {
2013+
Streamer->switchSection(SectionIter->second.first);
2014+
if (!StrOffsetsOutData.empty())
2015+
Streamer->emitBytes(StrOffsetsOutData);
2016+
else
2017+
Streamer->emitBytes(StrOffsetsContent);
2018+
}
19032019
}
19042020
Streamer->finish();
19052021
TempOut->keep();

0 commit comments

Comments
 (0)