Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/ModuloSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class ModuloScheduleExpander {
/// Instructions to change when emitting the final schedule.
InstrChangesTy InstrChanges;

/// Record the registers that need to compute live intervals.
SmallVector<Register> NoIntervalRegs;

void generatePipelinedLoop();
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
Expand All @@ -211,6 +214,7 @@ class ModuloScheduleExpander {
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
ValueMapTy *VRMap);
void calculateIntervals();
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
Expand Down
67 changes: 54 additions & 13 deletions llvm/lib/CodeGen/ModuloSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
KernelBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = CI;
}

Expand All @@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &MI;
}

Expand Down Expand Up @@ -179,6 +181,10 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Add branches between prolog and epilog blocks.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

// The intervals of newly created virtual registers are calculated after the
// kernel expansion.
calculateIntervals();

delete[] VRMap;
delete[] VRMapPhi;
}
Expand Down Expand Up @@ -226,6 +232,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
NewBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &*BBI;
}
}
Expand Down Expand Up @@ -303,6 +310,7 @@ void ModuloScheduleExpander::generateEpilog(
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
NewBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = In;
}
}
Expand Down Expand Up @@ -344,13 +352,16 @@ void ModuloScheduleExpander::generateEpilog(
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS) {
LiveIntervals &LIS,
SmallVector<Register> &NoIntervalRegs) {
for (MachineOperand &O :
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
// The interval will be calculated after the kernel expansion in
// calculateIntervals().
if (!LIS.hasInterval(ToReg))
LIS.createEmptyInterval(ToReg);
NoIntervalRegs.push_back(ToReg);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd expect this to be unconditionally added as well, you just created the new register

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

}

/// Return true if the register has a use that occurs outside the
Expand Down Expand Up @@ -542,7 +553,8 @@ void ModuloScheduleExpander::generateExistingPhis(
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS,
NoIntervalRegs);
continue;
}
}
Expand All @@ -560,6 +572,7 @@ void ModuloScheduleExpander::generateExistingPhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;

Expand All @@ -582,7 +595,7 @@ void ModuloScheduleExpander::generateExistingPhis(
// register to replace depends on whether the Phi is scheduled in the
// epilog.
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, NoIntervalRegs);

// In the kernel, a dependent Phi uses the value from this Phi.
if (InKernel)
Expand All @@ -602,7 +615,7 @@ void ModuloScheduleExpander::generateExistingPhis(
if (NumStages == 0 && IsLast) {
auto It = VRMap[CurStageNum].find(LoopVal);
if (It != VRMap[CurStageNum].end())
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS, NoIntervalRegs);
}
}
}
Expand Down Expand Up @@ -702,6 +715,7 @@ void ModuloScheduleExpander::generatePhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;

Expand All @@ -722,7 +736,7 @@ void ModuloScheduleExpander::generatePhis(
NewReg);
}
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, NoIntervalRegs);
}
}
}
Expand Down Expand Up @@ -831,9 +845,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
// We split the lifetime when we find the first use.
if (!SplitReg) {
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(Def);
MachineInstr *newCopy =
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(Def);
LIS.InsertMachineInstrInMaps(*newCopy);
}
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
}
Expand Down Expand Up @@ -901,13 +917,17 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
removePhis(Epilog, LastEpi);
// Remove the blocks that are no longer referenced.
if (LastPro != LastEpi) {
for (auto &MI : *LastEpi)
LIS.RemoveMachineInstrFromMaps(MI);
LastEpi->clear();
LastEpi->eraseFromParent();
}
if (LastPro == KernelBB) {
LoopInfo->disposed(&LIS);
NewKernel = nullptr;
}
for (auto &MI : *LastPro)
LIS.RemoveMachineInstrFromMaps(MI);
LastPro->clear();
LastPro->eraseFromParent();
} else {
Expand All @@ -928,6 +948,26 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
}
}

/// Some registers are generated during the kernel expansion. We calculate the
/// live intervals of these registers after the expansion.
void ModuloScheduleExpander::calculateIntervals() {
// The interval can be computed if all the register's non-debug users have
// slot indexes.
auto CanCalculateInterval = [this](Register Reg) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't need to pre-filter anything. Your function is broken if there are instructions without slot indexes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

for (auto &Opnd : this->MRI.reg_nodbg_operands(Reg))
if (this->LIS.isNotInMIMap(*Opnd.getParent()))
return false;
return true;
};
for (auto Reg : NoIntervalRegs) {
if (CanCalculateInterval(Reg))
LIS.createAndComputeVirtRegInterval(Reg);
else
LIS.createEmptyInterval(Reg);
}
NoIntervalRegs.clear();
}

/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
Expand Down Expand Up @@ -1049,7 +1089,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
MO.setReg(NewReg);
VRMap[CurStageNum][reg] = NewReg;
if (LastDef)
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS, NoIntervalRegs);
} else if (MO.isUse()) {
MachineInstr *Def = MRI.getVRegDef(reg);
// Compute the stage that contains the last definition for instruction.
Expand Down Expand Up @@ -1198,10 +1238,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
UseOp.setReg(ReplaceReg);
else {
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
SplitReg)
.addReg(ReplaceReg);
MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(ReplaceReg);
UseOp.setReg(SplitReg);
LIS.InsertMachineInstrInMaps(*newCopy);
}
}
}
Expand Down
157 changes: 157 additions & 0 deletions llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s

--- |
define void @test_swp_ws_live_intervals(i32 %.pre) {
entry:
%cgep9 = bitcast ptr null to ptr
br label %for.body147

for.body147: ; preds = %for.body170, %entry
%add11.i526 = or i32 %.pre, 1
br label %for.body158

for.body158: ; preds = %for.body158, %for.body147
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
%add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
%and8.i534 = and i32 %add11.i536602603, 1
%cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
store i32 0, ptr %cgep7, align 4
%lsr.iv.next = add nsw i32 %lsr.iv, 1
%cmp157.3 = icmp ult i32 %lsr.iv.next, 510
br i1 %cmp157.3, label %for.body158, label %for.body170

for.body170: ; preds = %for.body170, %for.body158
%lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
%add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
%cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
store i32 0, ptr %cgep5, align 8
%sub.i547.1 = add i32 %add11.i556606607, 1
%and.i548.1 = and i32 %sub.i547.1, 1
%cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
%0 = load i32, ptr %cgep8, align 4
store i32 %0, ptr %lsr.iv3, align 4
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
%cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
%cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
br i1 %cmp169.1, label %for.body170, label %for.body147
}

...
---
name: test_swp_ws_live_intervals
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_swp_ws_live_intervals
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0

%0:intregs = COPY $r0
%1:intregs = S2_setbit_i %0, 0

bb.1:
successors: %bb.2(0x80000000)

J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr

bb.2:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

%2:intregs = PHI %1, %bb.1, %3, %bb.2
%4:intregs = A2_andir %2, 1
%5:intregs = S2_asl_i_r %4, 2
S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
%6:intregs = A2_tfrsi 1
%7:intregs = A2_tfrsi 4
%3:intregs = A2_tfrsi 0
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.3, implicit-def dead $pc

bb.3:
successors: %bb.4(0x80000000)

J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
J2_jump %bb.4, implicit-def $pc

bb.4:
successors: %bb.4(0x7c000000), %bb.1(0x04000000)

%8:intregs = PHI %7, %bb.3, %9, %bb.4
%10:intregs = PHI %6, %bb.3, %11, %bb.4
%11:intregs = A2_tfrsi 0
S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
%12:intregs = A2_addi %10, 1
%13:intregs = S2_insert %5, %12, 1, 2
%14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
%9:intregs = A2_addi %8, 2
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.1, implicit-def dead $pc

...
Loading