From dba155e7d07e716435811f6f8d30c4491ae6b72e Mon Sep 17 00:00:00 2001 From: Yashas Andaluri Date: Tue, 21 Jan 2025 11:23:48 +0530 Subject: [PATCH] [RDF] Create phi nodes for clobbering defs When a def in a block A reaches another block B that is in A's iterated dominance frontier, a phi node is added to B for the def register. A clobbering def can be created at a call instruction, for a register clobbered by a call. However, phi nodes are not created for a register, when one of the reaching defs of the register is a clobbering def. This patch adds phi nodes for registers that have a clobbering reaching def. These additional phis help in checking reaching defs for an instruction in RDF based copy propagation and addressing mode optimizations. --- llvm/include/llvm/CodeGen/RDFGraph.h | 7 +- llvm/lib/CodeGen/RDFGraph.cpp | 62 +++++++- .../test/CodeGen/Hexagon/rdf-copy-clobber.mir | 143 ++++++++++++++++++ llvm/test/CodeGen/Hexagon/rdf-phi-clobber.mir | 102 +++++++++++++ 4 files changed, 304 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/rdf-copy-clobber.mir create mode 100644 llvm/test/CodeGen/Hexagon/rdf-phi-clobber.mir diff --git a/llvm/include/llvm/CodeGen/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h index cf7344e8c3e74..8a93afbcb5491 100644 --- a/llvm/include/llvm/CodeGen/RDFGraph.h +++ b/llvm/include/llvm/CodeGen/RDFGraph.h @@ -865,8 +865,9 @@ struct DataFlowGraph { using BlockRefsMap = RegisterAggrMap; void buildStmt(Block BA, MachineInstr &In); - void recordDefsForDF(BlockRefsMap &PhiM, Block BA); - void buildPhis(BlockRefsMap &PhiM, Block BA); + void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &PhiClobberM, Block BA); + void buildPhis(BlockRefsMap &PhiM, Block BA, + const DefStackMap &DefM = DefStackMap()); void removeUnusedPhis(); void pushClobbers(Instr IA, DefStackMap &DM); @@ -874,7 +875,7 @@ struct DataFlowGraph { template void linkRefUp(Instr IA, NodeAddr TA, DefStack &DS); template void linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P); - void linkBlockRefs(DefStackMap &DefM, Block BA); + void linkBlockRefs(DefStackMap &DefM, BlockRefsMap &PhiClobberM, Block BA); void unlinkUseDF(Use UA); void unlinkDefDF(Def DA); diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index 483e61db788f4..805b0ee7be0bc 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -966,15 +966,18 @@ void DataFlowGraph::build(const Config &config) { // Build a map "PhiM" which will contain, for each block, the set // of references that will require phi definitions in that block. + // "PhiClobberM" map contains references that require phis for clobbering defs BlockRefsMap PhiM(getPRI()); + BlockRefsMap PhiClobberM(getPRI()); for (Block BA : Blocks) - recordDefsForDF(PhiM, BA); + recordDefsForDF(PhiM, PhiClobberM, BA); for (Block BA : Blocks) buildPhis(PhiM, BA); // Link all the refs. This will recursively traverse the dominator tree. + // Phis for clobbering defs are added here. DefStackMap DM; - linkBlockRefs(DM, EA); + linkBlockRefs(DM, PhiClobberM, EA); // Finally, remove all unused phi nodes. if (!(BuildCfg.Options & BuildOptions::KeepDeadPhis)) @@ -1378,7 +1381,9 @@ void DataFlowGraph::buildStmt(Block BA, MachineInstr &In) { // Scan all defs in the block node BA and record in PhiM the locations of // phi nodes corresponding to these defs. -void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) { +// Clobbering defs in BA are recorded in PhiClobberM +void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, + BlockRefsMap &PhiClobberM, Block BA) { // Check all defs from block BA and record them in each block in BA's // iterated dominance frontier. This information will later be used to // create phi nodes. @@ -1394,11 +1399,17 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) { // This is done to make sure that each defined reference gets only one // phi node, even if it is defined multiple times. RegisterAggr Defs(getPRI()); + RegisterAggr ClobberDefs(getPRI()); for (Instr IA : BA.Addr->members(*this)) { for (Ref RA : IA.Addr->members_if(IsDef, *this)) { RegisterRef RR = RA.Addr->getRegRef(*this); - if (RR.isReg() && isTracked(RR)) + if (!isTracked(RR)) + continue; + if (RR.isReg()) Defs.insert(RR); + // Clobbering def + else if (RR.isMask()) + ClobberDefs.insert(RR); } } @@ -1416,12 +1427,14 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) { for (auto *DB : IDF) { Block DBA = findBlock(DB); PhiM[DBA.Id].insert(Defs); + PhiClobberM[DBA.Id].insert(ClobberDefs); } } // Given the locations of phi nodes in the map PhiM, create the phi nodes // that are located in the block node BA. -void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) { +void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA, + const DefStackMap &DefM) { // Check if this blocks has any DF defs, i.e. if there are any defs // that this block is in the iterated dominance frontier of. auto HasDF = PhiM.find(BA.Id); @@ -1434,10 +1447,37 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) { for (MachineBasicBlock *PB : MBB->predecessors()) Preds.push_back(findBlock(PB)); + RegisterAggr PhiDefs(getPRI()); + // DefM will be non empty when we are building phis + // for clobbering defs + if (!DefM.empty()) { + for (Instr IA : BA.Addr->members_if(IsPhi, *this)) { + for (Def DA : IA.Addr->members_if(IsDef, *this)) { + auto DR = DA.Addr->getRegRef(*this); + PhiDefs.insert(DR); + } + } + } + + MachineRegisterInfo &MRI = MF.getRegInfo(); const RegisterAggr &Defs = PhiM[BA.Id]; uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; for (RegisterRef RR : Defs.refs()) { + if (!DefM.empty()) { + auto F = DefM.find(RR.Reg); + // Do not create a phi for unallocatable registers, or for registers + // that are never livein to BA. + // If a phi exists for RR, do not create another. + if (!MRI.isAllocatable(RR.Reg) || PhiDefs.hasCoverOf(RR) || + F == DefM.end() || F->second.empty()) + continue; + // Do not create a phi, if all reaching defs are clobbering + auto RDef = F->second.top(); + if (RDef->Addr->getFlags() & NodeAttrs::Clobbering) + continue; + PhiDefs.insert(RR); + } Phi PA = newPhi(BA); PA.Addr->addMember(newDef(PA, RR, PhiFlags), *this); @@ -1576,7 +1616,15 @@ void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P) { // Create data-flow links for all instructions in the block node BA. This // will include updating any phi nodes in BA. -void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) { +void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, BlockRefsMap &PhiClobberM, + Block BA) { + // Create phi nodes for clobbering defs. + // Since a huge number of registers can get clobbered, it would result in many + // phi nodes being created in the graph. Only create phi nodes that have a non + // clobbering reaching def. Use DefM to get not clobbering defs reaching a + // block. + buildPhis(PhiClobberM, BA, DefM); + // Push block delimiters. markBlock(BA.Id, DefM); @@ -1613,7 +1661,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) { for (auto *I : *N) { MachineBasicBlock *SB = I->getBlock(); Block SBA = findBlock(SB); - linkBlockRefs(DefM, SBA); + linkBlockRefs(DefM, PhiClobberM, SBA); } // Link the phi uses from the successor blocks. diff --git a/llvm/test/CodeGen/Hexagon/rdf-copy-clobber.mir b/llvm/test/CodeGen/Hexagon/rdf-copy-clobber.mir new file mode 100644 index 0000000000000..e0676a143eefe --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/rdf-copy-clobber.mir @@ -0,0 +1,143 @@ +# RUN: llc -march=hexagon -run-pass=hexagon-rdf-opt -hexagon-rdf-dump -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s + +# Check that RDF graph has a phi node for R28 register in bb.3 and bb.4 +# R28 is clobbered by memcpy call. The clobbering def must be present in bb.4's IDF +# This phi node should prevent $r27 from being replaced by $r28 by RDF copy propagation + +#CHECK-LABEL: Starting copy propagation on: foo + +#CHECK-LABEL: --- %bb.3 --- +#CHECK: p{{[0-9]+}}: phi [+d{{[0-9]+}} + +#CHECK-LABEL: --- %bb.4 --- +#CHECK: p{{[0-9]+}}: phi [+d{{[0-9]+}} + +#CHECK-LABEL: After Hexagon RDF optimizations +#CHECK-LABEL: bb.3: +#CHECK: renamable $r0 = A2_add renamable $r27 + +--- | + define internal fastcc void @foo() unnamed_addr { + entry: + ret void + } + + declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) + +--- +name: foo +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$d0', virtual-reg: '' } + - { reg: '$d3', virtual-reg: '' } + - { reg: '$r23', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: + - { id: 0, type: default, offset: 40, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1 + liveins: $d0, $d3, $r23 + + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2 + liveins: $d0:0x0000000000000003, $d3:0x0000000000000003, $r23 + + renamable $r28 = L2_loadri_io %fixed-stack.0, 0 :: (load (s32) from %fixed-stack.0) + renamable $r27 = COPY killed renamable $r28 + + bb.2: + successors: %bb.3 + liveins: $d0:0x0000000000000003, $d3:0x0000000000000003, $r23, $r27 + + renamable $d10 = L2_loadrd_io %stack.0, 0 :: (load (s64) from %stack.0) + renamable $d11 = L2_loadrd_io %stack.1, 0 :: (load (s64) from %stack.1) + + bb.3: + successors: %bb.4, %bb.3 + liveins: $d0:0x0000000000000003, $d3:0x0000000000000003, $d10:0x0000000000000003, $d11:0x0000000000000002, $r23, $r27 + + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 + renamable $r1 = A2_add renamable $r23, killed renamable $r0 + $r2 = COPY renamable $r22 + renamable $r0 = A2_add renamable $r27, killed renamable $r6 + J2_call &memcpy, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit $r1, implicit $r2, implicit-def $r29, implicit-def dead $r0 + renamable $p0 = C2_cmpgtp renamable $d11, renamable $d10 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 + J2_jumpt killed renamable $p0, %bb.3, implicit-def dead $pc + J2_jump %bb.4, implicit-def dead $pc + + bb.4: + successors: %bb.5, %bb.2 + liveins: $d10:0x0000000000000003, $d11:0x0000000000000002, $r23, $r27 + + renamable $d0 = L2_loadrd_io %stack.2, 0 :: (load (s64) from %stack.2) + renamable $d3 = L2_loadrd_io %stack.3, 0 :: (load (s64) from %stack.3) + renamable $p0 = C2_cmpgtp killed renamable $d0, killed renamable $d3 + J2_jumpt killed renamable $p0, %bb.2, implicit-def dead $pc + J2_jump %bb.5, implicit-def dead $pc + + bb.5: + PS_jmpret $r31, implicit-def dead $pc + +... diff --git a/llvm/test/CodeGen/Hexagon/rdf-phi-clobber.mir b/llvm/test/CodeGen/Hexagon/rdf-phi-clobber.mir new file mode 100644 index 0000000000000..d49cc3403d644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/rdf-phi-clobber.mir @@ -0,0 +1,102 @@ +# RUN: llc -march=hexagon -run-pass=hexagon-rdf-opt \ +# RUN: -hexagon-rdf-dump -verify-machineinstrs -o /dev/null %s 2>&1 \ +# RUN: | FileCheck %s + +# Check that phi nodes that only have clobbering reaching defs are not created +# during graph construction. Check that there are no phi nodes for HVX registers + +#CHECK-LABEL: --- %bb.1 --- +#CHECK-NOT: p{{[0-9]+}}: phi [+d{{[0-9]+}} + +--- | + @.str.3 = private unnamed_addr constant [2 x i8] c"%d", align 8 + @.str.4 = private unnamed_addr constant [2 x i8] c"%d", align 8 + + define internal fastcc void @foo() unnamed_addr { + entry: + ret void + } + + declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr + +--- +name: foo +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$d0', virtual-reg: '' } + - { reg: '$d3', virtual-reg: '' } + - { reg: '$r23', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1 + liveins: $r25, $r26, $d11 + + renamable $r16 = A2_tfrsi 0 + S2_storerd_io $r29, 0, renamable $d11 :: (store (s64) into stack) + $r0 = A2_tfrsi @.str.3 + J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def dead $r0 + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2, %bb.1 + liveins: $r16, $r25, $r26 + + S2_storeri_io $r29, 0, killed renamable $r25 :: (store (s32) into stack) + $r0 = A2_tfrsi @.str.4 + S2_storeri_io $r29, 8, killed renamable $r26 :: (store (s64) into stack + 8) + J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def dead $r0 + renamable $p0 = C2_cmpgti renamable $r16, 4 + renamable $r16 = nsw A2_addi killed renamable $r16, 1 + J2_jumpf killed renamable $p0, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.2: + liveins: $r16, $r25, $r26 + + PS_jmpret $r31, implicit-def dead $pc + +...