Skip to content

Commit 8809cdf

Browse files
vext01ptersilie
andcommitted
Store all corresponding BasicBlocks in the block address map.
LLVM's codegen can can merge multiple BasicBlocks into a single MachineBasicBlock. Unfortunately, MachineBasicBlock::getBasicBlock() only returns the first BasicBlock in the merged sequence, so we have to find the other corresponding BasicBlock(s) (if any) in the merged sequence another way. We do so in two steps: 1. We create a set, MergedBBs, which is the set of BasicBlocks that are *not* returned by MachineBasicBlock::getBasicBlock(MBB) for any MachineBasicBlock, MBB, in the parent MachineFunction -- in other words, it's the set of BasicBlocks that have been merged into a predecessor during codegen. 2. For each BasicBlock BBX returned by MachineBasicBlock::getBasicBlock() we check if it is terminated by an unconditional branch. If so and that unconditional branch transfers to a block BBY, and BBY is a member of MergedBBs, then we know that BBX and BBY were merged during codegen. [Note that we then see if another BBZ was also merged into BBY and so on] Co-authored-by: Lukas Diekmann <[email protected]>
1 parent b9092ed commit 8809cdf

File tree

1 file changed

+61
-20
lines changed

1 file changed

+61
-20
lines changed

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,6 @@ const char PPTimerDescription[] = "Pseudo Probe Emission";
156156
const char PPGroupName[] = "pseudo probe";
157157
const char PPGroupDescription[] = "Pseudo Probe Emission";
158158

159-
// A basic block index value used in the bb_addr_map to indicate that there
160-
// is no correspoinding IR block for the given machine basic block.
161-
const uint64_t NO_BB = UINT64_MAX;
162-
163159
STATISTIC(EmittedInsts, "Number of machine instrs printed");
164160

165161
char AsmPrinter::ID = 0;
@@ -1156,6 +1152,35 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
11561152
// Emit the total number of basic blocks in this function.
11571153
OutStreamer->emitULEB128IntValue(MF.size());
11581154
const Function &F = MF.getFunction();
1155+
1156+
// LLVM's codegen can can merge multiple BasicBlocks into a single
1157+
// MachineBasicBlock. Unfortunately, MachineBasicBlock::getBasicBlock() only
1158+
// returns the first BasicBlock in the merged sequence, so we have to find
1159+
// the other corresponding BasicBlock(s) (if any) in the merged sequence
1160+
// another way. We do so in two steps:
1161+
//
1162+
// 1. We create a set, MergedBBs, which is the set of BasicBlocks that are
1163+
// *not* returned by MachineBasicBlock::getBasicBlock(MBB) for any
1164+
// MachineBasicBlock, MBB, in the parent MachineFunction -- in other words,
1165+
// it's the set of BasicBlocks that have been merged into a predecessor
1166+
// during codegen.
1167+
//
1168+
// 2. For each BasicBlock BBX returned by
1169+
// MachineBasicBlock::getBasicBlock() we check if it is terminated by an
1170+
// unconditional branch. If so and that unconditional branch transfers to a
1171+
// block BBY, and BBY is a member of MergedBBs, then we know that BBX and
1172+
// BBY were merged during codegen. [Note that we then see if another BBZ
1173+
// was also merged into BBY and so on]
1174+
std::set<const BasicBlock *> MergedBBs;
1175+
for (const BasicBlock &BB : F) {
1176+
MergedBBs.insert(&BB);
1177+
}
1178+
for (const MachineBasicBlock &MBB : MF) {
1179+
const BasicBlock *BB = MBB.getBasicBlock();
1180+
if (BB != nullptr) {
1181+
MergedBBs.erase(BB);
1182+
}
1183+
}
11591184
// Emit BB Information for each basic block in the funciton.
11601185
for (const MachineBasicBlock &MBB : MF) {
11611186
const MCSymbol *MBBSymbol =
@@ -1166,27 +1191,43 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
11661191
// always be computed from their offsets.
11671192
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
11681193
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
1169-
// Emit the index of the corresponding LLVMIR basic block.
1170-
size_t BBIdx = 0;
1171-
bool found = false;
1172-
const BasicBlock *FindBB = MBB.getBasicBlock();
1173-
if (FindBB == nullptr) {
1174-
found = true;
1175-
BBIdx = NO_BB;
1176-
} else {
1194+
// Find BBs corresponding with this MBB as described above.
1195+
const BasicBlock *CorrBB = MBB.getBasicBlock();
1196+
std::vector<const BasicBlock *> CorrBBs;
1197+
while (CorrBB != nullptr) {
1198+
CorrBBs.push_back(CorrBB);
1199+
const Instruction *Term = CorrBB->getTerminator();
1200+
assert(Term != nullptr);
1201+
if ((isa<BranchInst>(Term)) &&
1202+
(!(dyn_cast<const BranchInst>(Term))->isConditional()))
1203+
{
1204+
CorrBB = CorrBB->getUniqueSuccessor();
1205+
assert(CorrBB != nullptr);
1206+
if (MergedBBs.count(CorrBB) == 0) {
1207+
CorrBB = nullptr;
1208+
}
1209+
} else {
1210+
CorrBB = nullptr;
1211+
}
1212+
}
1213+
// Emit the number of corresponding BasicBlocks.
1214+
OutStreamer->emitULEB128IntValue(CorrBBs.size());
1215+
// Emit the corresponding block indices.
1216+
for (auto CorrBB : CorrBBs) {
1217+
size_t I = 0;
1218+
bool Found = false;
11771219
for (auto It = F.begin(); It != F.end(); It++) {
11781220
const BasicBlock *BB = &*It;
1179-
if (BB == FindBB) {
1180-
found = true;
1181-
break;
1221+
if (BB == CorrBB) {
1222+
Found = true;
1223+
break;
11821224
}
1183-
BBIdx++;
1184-
assert(BBIdx != NO_BB); // Or we are out of encoding space.
1225+
I++;
11851226
}
1227+
if (!Found)
1228+
OutContext.reportError(SMLoc(), "Couldn't find the block's index");
1229+
OutStreamer->emitULEB128IntValue(I);
11861230
}
1187-
if (!found)
1188-
OutContext.reportError(SMLoc(), "Couldn't find the block's index");
1189-
OutStreamer->emitULEB128IntValue(BBIdx);
11901231
}
11911232
OutStreamer->PopSection();
11921233
}

0 commit comments

Comments
 (0)