Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/ModuloSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class ModuloScheduleExpander {
/// Instructions to change when emitting the final schedule.
InstrChangesTy InstrChanges;

/// Record the registers that need to compute live intervals.
SmallVector<Register> NoIntervalRegs;

void generatePipelinedLoop();
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
Expand All @@ -211,6 +214,7 @@ class ModuloScheduleExpander {
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
ValueMapTy *VRMap);
void calculateIntervals();
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
Expand Down
72 changes: 52 additions & 20 deletions llvm/lib/CodeGen/ModuloSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
KernelBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = CI;
}

Expand All @@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &MI;
}

Expand Down Expand Up @@ -179,6 +181,10 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Add branches between prolog and epilog blocks.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

// The intervals of newly created virtual registers are calculated after the
// kernel expansion.
calculateIntervals();

delete[] VRMap;
delete[] VRMapPhi;
}
Expand Down Expand Up @@ -226,6 +232,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
NewBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &*BBI;
}
}
Expand Down Expand Up @@ -303,6 +310,7 @@ void ModuloScheduleExpander::generateEpilog(
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
NewBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = In;
}
}
Expand Down Expand Up @@ -343,14 +351,11 @@ void ModuloScheduleExpander::generateEpilog(
/// basic block with ToReg.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS) {
MachineRegisterInfo &MRI) {
for (MachineOperand &O :
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
if (!LIS.hasInterval(ToReg))
LIS.createEmptyInterval(ToReg);
}

/// Return true if the register has a use that occurs outside the
Expand Down Expand Up @@ -541,8 +546,10 @@ void ModuloScheduleExpander::generateExistingPhis(
if (VRMap[LastStageNum - np - 1].count(LoopVal))
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
if (IsLast && np == NumPhis - 1) {
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}
continue;
}
}
Expand All @@ -560,6 +567,7 @@ void ModuloScheduleExpander::generateExistingPhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;

Expand All @@ -581,8 +589,10 @@ void ModuloScheduleExpander::generateExistingPhis(
// Check if we need to rename any uses that occurs after the loop. The
// register to replace depends on whether the Phi is scheduled in the
// epilog.
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
if (IsLast && np == NumPhis - 1) {
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}

// In the kernel, a dependent Phi uses the value from this Phi.
if (InKernel)
Expand All @@ -601,8 +611,10 @@ void ModuloScheduleExpander::generateExistingPhis(
// scheduling.
if (NumStages == 0 && IsLast) {
auto It = VRMap[CurStageNum].find(LoopVal);
if (It != VRMap[CurStageNum].end())
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
if (It != VRMap[CurStageNum].end()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Double map lookup on VRMap[CurStageNum]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that VRMap is a pointer to a contiguous block of memory, and VRMap[CurStageNum] does not involve hash lookup. The naming of VRMap is somewhat misleading.
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I understand that this can remain unchanged here. What is your opinion on this? @arsenm

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's still a DenseMap, just use the temporary reference to avoid it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, do you see any other modifications needed for this patch? @arsenm

replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
NoIntervalRegs.push_back(It->second);
}
}
}
}
Expand Down Expand Up @@ -702,6 +714,7 @@ void ModuloScheduleExpander::generatePhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;

Expand All @@ -721,8 +734,10 @@ void ModuloScheduleExpander::generatePhis(
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
NewReg);
}
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
if (IsLast && np == NumPhis - 1) {
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}
}
}
}
Expand Down Expand Up @@ -831,9 +846,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
// We split the lifetime when we find the first use.
if (!SplitReg) {
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(Def);
MachineInstr *newCopy =
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(Def);
LIS.InsertMachineInstrInMaps(*newCopy);
}
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
}
Expand Down Expand Up @@ -901,13 +918,17 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
removePhis(Epilog, LastEpi);
// Remove the blocks that are no longer referenced.
if (LastPro != LastEpi) {
for (auto &MI : *LastEpi)
LIS.RemoveMachineInstrFromMaps(MI);
LastEpi->clear();
LastEpi->eraseFromParent();
}
if (LastPro == KernelBB) {
LoopInfo->disposed(&LIS);
NewKernel = nullptr;
}
for (auto &MI : *LastPro)
LIS.RemoveMachineInstrFromMaps(MI);
LastPro->clear();
LastPro->eraseFromParent();
} else {
Expand All @@ -928,6 +949,14 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
}
}

/// Some registers are generated during the kernel expansion. We calculate the
/// live intervals of these registers after the expansion.
void ModuloScheduleExpander::calculateIntervals() {
for (Register Reg : NoIntervalRegs)
LIS.createAndComputeVirtRegInterval(Reg);
NoIntervalRegs.clear();
}

/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
Expand Down Expand Up @@ -1048,8 +1077,10 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
Register NewReg = MRI.createVirtualRegister(RC);
MO.setReg(NewReg);
VRMap[CurStageNum][reg] = NewReg;
if (LastDef)
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
if (LastDef) {
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}
} else if (MO.isUse()) {
MachineInstr *Def = MRI.getVRegDef(reg);
// Compute the stage that contains the last definition for instruction.
Expand Down Expand Up @@ -1198,10 +1229,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
UseOp.setReg(ReplaceReg);
else {
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
SplitReg)
.addReg(ReplaceReg);
MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(ReplaceReg);
UseOp.setReg(SplitReg);
LIS.InsertMachineInstrInMaps(*newCopy);
}
}
}
Expand Down
157 changes: 157 additions & 0 deletions llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s

--- |
define void @test_swp_ws_live_intervals(i32 %.pre) {
entry:
%cgep9 = bitcast ptr null to ptr
br label %for.body147

for.body147: ; preds = %for.body170, %entry
%add11.i526 = or i32 %.pre, 1
br label %for.body158

for.body158: ; preds = %for.body158, %for.body147
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
%add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
%and8.i534 = and i32 %add11.i536602603, 1
%cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
store i32 0, ptr %cgep7, align 4
%lsr.iv.next = add nsw i32 %lsr.iv, 1
%cmp157.3 = icmp ult i32 %lsr.iv.next, 510
br i1 %cmp157.3, label %for.body158, label %for.body170

for.body170: ; preds = %for.body170, %for.body158
%lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
%add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
%cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
store i32 0, ptr %cgep5, align 8
%sub.i547.1 = add i32 %add11.i556606607, 1
%and.i548.1 = and i32 %sub.i547.1, 1
%cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
%0 = load i32, ptr %cgep8, align 4
store i32 %0, ptr %lsr.iv3, align 4
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
%cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
%cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
br i1 %cmp169.1, label %for.body170, label %for.body147
}

...
---
name: test_swp_ws_live_intervals
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_swp_ws_live_intervals
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0

%0:intregs = COPY $r0
%1:intregs = S2_setbit_i %0, 0

bb.1:
successors: %bb.2(0x80000000)

J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr

bb.2:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

%2:intregs = PHI %1, %bb.1, %3, %bb.2
%4:intregs = A2_andir %2, 1
%5:intregs = S2_asl_i_r %4, 2
S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
%6:intregs = A2_tfrsi 1
%7:intregs = A2_tfrsi 4
%3:intregs = A2_tfrsi 0
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.3, implicit-def dead $pc

bb.3:
successors: %bb.4(0x80000000)

J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
J2_jump %bb.4, implicit-def $pc

bb.4:
successors: %bb.4(0x7c000000), %bb.1(0x04000000)

%8:intregs = PHI %7, %bb.3, %9, %bb.4
%10:intregs = PHI %6, %bb.3, %11, %bb.4
%11:intregs = A2_tfrsi 0
S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
%12:intregs = A2_addi %10, 1
%13:intregs = S2_insert %5, %12, 1, 2
%14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
%9:intregs = A2_addi %8, 2
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.1, implicit-def dead $pc

...