Skip to content

Commit d13e191

Browse files
committed
[HEXAGON] Fix corner cases for hwloops pass
Add check to make sure Dist > 0 or Dist < 0 for appropriate cmp cases to hexagon hardware loops pass. The change modifies the HexagonHardwareLoops pass to add runtime checks to make sure that end_value > initial_value for less than comparisons and end_value < initial_value for greater than comparisons. Change-Id: Ie4b3666ecf69b7aebeb6cfaa48535063677f929c
1 parent 3f62718 commit d13e191

File tree

3 files changed

+329
-3
lines changed

3 files changed

+329
-3
lines changed

llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,11 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
731731
Register IVReg,
732732
int64_t IVBump,
733733
Comparison::Kind Cmp) const {
734+
LLVM_DEBUG(llvm::dbgs() << "Loop: " << *Loop << "\n");
735+
LLVM_DEBUG(llvm::dbgs() << "Initial Value: " << *Start << "\n");
736+
LLVM_DEBUG(llvm::dbgs() << "End Value: " << *End << "\n");
737+
LLVM_DEBUG(llvm::dbgs() << "Inc/Dec Value: " << IVBump << "\n");
738+
LLVM_DEBUG(llvm::dbgs() << "Comparison: " << Cmp << "\n");
734739
// Cannot handle comparison EQ, i.e. while (A == B).
735740
if (Cmp == Comparison::EQ)
736741
return nullptr;
@@ -846,6 +851,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
846851
if (IVBump < 0) {
847852
std::swap(Start, End);
848853
IVBump = -IVBump;
854+
std::swap(CmpLess, CmpGreater);
849855
}
850856
// Cmp may now have a wrong direction, e.g. LEs may now be GEs.
851857
// Signedness, and "including equality" are preserved.
@@ -989,7 +995,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
989995
CountSR = 0;
990996
}
991997

992-
return new CountValue(CountValue::CV_Register, CountR, CountSR);
998+
const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
999+
Register MuxR = CountR;
1000+
unsigned MuxSR = CountSR;
1001+
// For the loop count to be valid unsigned number, CmpLess should imply
1002+
// Dist >= 0. Similarly, CmpGreater should imply Dist < 0. We can skip the
1003+
// check if the initial distance is zero and the comparison is LTu || LTEu.
1004+
if (!(Start->isImm() && StartV == 0 && Comparison::isUnsigned(Cmp) &&
1005+
CmpLess) &&
1006+
(CmpLess || CmpGreater)) {
1007+
// Generate:
1008+
// DistCheck = CMP_GT DistR, 0 --> CmpLess
1009+
// DistCheck = CMP_GT DistR, -1 --> CmpGreater
1010+
Register DistCheckR = MRI->createVirtualRegister(PredRC);
1011+
const MCInstrDesc &DistCheckD = TII->get(Hexagon::C2_cmpgti);
1012+
BuildMI(*PH, InsertPos, DL, DistCheckD, DistCheckR)
1013+
.addReg(DistR, 0, DistSR)
1014+
.addImm((CmpLess) ? 0 : -1);
1015+
1016+
// Generate:
1017+
// MUXR = MUX DistCheck, CountR, 1 --> CmpLess
1018+
// MUXR = MUX DistCheck, 1, CountR --> CmpGreater
1019+
MuxR = MRI->createVirtualRegister(IntRC);
1020+
if (CmpLess) {
1021+
const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxir);
1022+
BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
1023+
.addReg(DistCheckR)
1024+
.addReg(CountR, 0, CountSR)
1025+
.addImm(1);
1026+
} else {
1027+
const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxri);
1028+
BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
1029+
.addReg(DistCheckR)
1030+
.addImm(1)
1031+
.addReg(CountR, 0, CountSR);
1032+
}
1033+
MuxSR = 0;
1034+
}
1035+
1036+
return new CountValue(CountValue::CV_Register, MuxR, MuxSR);
9931037
}
9941038

9951039
/// Return true if the operation is invalid within hardware loop.
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
# RUN: llc -run-pass=hwloops %s -o - | FileCheck %s
2+
3+
# CHECK-LABEL: name: f
4+
# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0
5+
# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1
6+
# CHECK-LABEL: name: g
7+
# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0
8+
# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1
9+
--- |
10+
@a = dso_local global [255 x ptr] zeroinitializer, align 8
11+
12+
; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none)
13+
define dso_local void @f(i32 noundef %m) local_unnamed_addr #0 {
14+
entry:
15+
%cond = tail call i32 @llvm.smax.i32(i32 %m, i32 2)
16+
%0 = add nsw i32 %cond, -4
17+
%1 = shl i32 %cond, 3
18+
%cgep = getelementptr i8, ptr @a, i32 %1
19+
%cgep36 = bitcast ptr @a to ptr
20+
br label %do.body
21+
22+
do.body: ; preds = %do.body, %entry
23+
%lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ]
24+
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ]
25+
%sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ]
26+
%shr = lshr i32 %sh.0, 1
27+
%cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr
28+
store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5
29+
%lsr.iv.next = add nsw i32 %lsr.iv, 4
30+
%cmp1 = icmp samesign ult i32 %lsr.iv.next, 1073741836
31+
%cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32
32+
br i1 %cmp1, label %do.body, label %do.end, !llvm.loop !9
33+
34+
do.end: ; preds = %do.body
35+
ret void
36+
}
37+
38+
; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none)
39+
define dso_local void @g(i32 noundef %m) local_unnamed_addr #0 {
40+
entry:
41+
%0 = add i32 %m, -4
42+
%1 = shl i32 %m, 3
43+
%cgep = getelementptr i8, ptr @a, i32 %1
44+
%cgep36 = bitcast ptr @a to ptr
45+
br label %do.body
46+
47+
do.body: ; preds = %do.body, %entry
48+
%lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ]
49+
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ]
50+
%sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ]
51+
%shr = lshr i32 %sh.0, 1
52+
%cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr
53+
store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5
54+
%lsr.iv.next = add i32 %lsr.iv, 4
55+
%cmp = icmp slt i32 %lsr.iv.next, 1073741836
56+
%cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32
57+
br i1 %cmp, label %do.body, label %do.end, !llvm.loop !11
58+
59+
do.end: ; preds = %do.body
60+
ret void
61+
}
62+
63+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
64+
declare i32 @llvm.smax.i32(i32, i32) #1
65+
66+
attributes #0 = { minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
67+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
68+
69+
!llvm.module.flags = !{!0, !1, !2, !3}
70+
71+
!0 = !{i32 1, !"wchar_size", i32 4}
72+
!1 = !{i32 8, !"PIC Level", i32 2}
73+
!2 = !{i32 7, !"PIE Level", i32 2}
74+
!3 = !{i32 7, !"frame-pointer", i32 2}
75+
!5 = !{!6, !6, i64 0}
76+
!6 = !{!"any pointer", !7, i64 0}
77+
!7 = !{!"omnipotent char", !8, i64 0}
78+
!8 = !{!"Simple C/C++ TBAA"}
79+
!9 = distinct !{!9, !10}
80+
!10 = !{!"llvm.loop.mustprogress"}
81+
!11 = distinct !{!11, !10}
82+
83+
...
84+
---
85+
name: f
86+
alignment: 4
87+
exposesReturnsTwice: false
88+
legalized: false
89+
regBankSelected: false
90+
selected: false
91+
failedISel: false
92+
tracksRegLiveness: true
93+
hasWinCFI: false
94+
noPhis: false
95+
isSSA: true
96+
noVRegs: false
97+
hasFakeUses: false
98+
callsEHReturn: false
99+
callsUnwindInit: false
100+
hasEHScopes: false
101+
hasEHFunclets: false
102+
isOutlined: false
103+
debugInstrRef: false
104+
failsVerification: false
105+
tracksDebugUserValues: false
106+
registers:
107+
- { id: 0, class: intregs, preferred-register: '', flags: [ ] }
108+
- { id: 1, class: intregs, preferred-register: '', flags: [ ] }
109+
- { id: 2, class: intregs, preferred-register: '', flags: [ ] }
110+
- { id: 3, class: intregs, preferred-register: '', flags: [ ] }
111+
- { id: 4, class: intregs, preferred-register: '', flags: [ ] }
112+
- { id: 5, class: intregs, preferred-register: '', flags: [ ] }
113+
- { id: 6, class: intregs, preferred-register: '', flags: [ ] }
114+
- { id: 7, class: intregs, preferred-register: '', flags: [ ] }
115+
- { id: 8, class: intregs, preferred-register: '', flags: [ ] }
116+
- { id: 9, class: intregs, preferred-register: '', flags: [ ] }
117+
- { id: 10, class: intregs, preferred-register: '', flags: [ ] }
118+
- { id: 11, class: intregs, preferred-register: '', flags: [ ] }
119+
- { id: 12, class: intregs, preferred-register: '', flags: [ ] }
120+
- { id: 13, class: predregs, preferred-register: '', flags: [ ] }
121+
- { id: 14, class: predregs, preferred-register: '', flags: [ ] }
122+
- { id: 15, class: intregs, preferred-register: '', flags: [ ] }
123+
liveins:
124+
- { reg: '$r0', virtual-reg: '%9' }
125+
frameInfo:
126+
isFrameAddressTaken: false
127+
isReturnAddressTaken: false
128+
hasStackMap: false
129+
hasPatchPoint: false
130+
stackSize: 0
131+
offsetAdjustment: 0
132+
maxAlignment: 1
133+
adjustsStack: false
134+
hasCalls: false
135+
stackProtector: ''
136+
functionContext: ''
137+
maxCallFrameSize: 4294967295
138+
cvBytesOfCalleeSavedRegisters: 0
139+
hasOpaqueSPAdjustment: false
140+
hasVAStart: false
141+
hasMustTailInVarArgFunc: false
142+
hasTailCall: false
143+
isCalleeSavedInfoValid: false
144+
localFrameSize: 0
145+
savePoint: ''
146+
restorePoint: ''
147+
fixedStack: []
148+
stack: []
149+
entry_values: []
150+
callSites: []
151+
debugValueSubstitutions: []
152+
constants: []
153+
machineFunctionInfo: {}
154+
body: |
155+
bb.0.entry:
156+
successors: %bb.1(0x80000000)
157+
liveins: $r0
158+
159+
%9:intregs = COPY $r0
160+
%11:intregs = A2_tfrsi 2
161+
%12:intregs = A2_max %9, %11
162+
%0:intregs = nsw A2_addi %12, -4
163+
%1:intregs = S4_addi_asl_ri @a, %12, 3
164+
%2:intregs = A2_tfrsi @a
165+
%10:intregs = A2_tfrsi 256
166+
167+
bb.1.do.body:
168+
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
169+
170+
%3:intregs = PHI %1, %bb.0, %8, %bb.1
171+
%4:intregs = PHI %0, %bb.0, %7, %bb.1
172+
%5:intregs = PHI %10, %bb.0, %15, %bb.1
173+
%15:intregs = S2_extractu %5, 8, 1
174+
S4_storeri_rr %2, %15, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5)
175+
%7:intregs = nsw A2_addi %4, 4
176+
%13:predregs = C2_cmpgtui %7, 1073741835
177+
%8:intregs = A2_addi %3, 32
178+
J2_jumpf %13, %bb.1, implicit-def dead $pc
179+
J2_jump %bb.2, implicit-def dead $pc
180+
181+
bb.2.do.end:
182+
PS_jmpret $r31, implicit-def dead $pc
183+
184+
...
185+
---
186+
name: g
187+
alignment: 4
188+
exposesReturnsTwice: false
189+
legalized: false
190+
regBankSelected: false
191+
selected: false
192+
failedISel: false
193+
tracksRegLiveness: true
194+
hasWinCFI: false
195+
noPhis: false
196+
isSSA: true
197+
noVRegs: false
198+
hasFakeUses: false
199+
callsEHReturn: false
200+
callsUnwindInit: false
201+
hasEHScopes: false
202+
hasEHFunclets: false
203+
isOutlined: false
204+
debugInstrRef: false
205+
failsVerification: false
206+
tracksDebugUserValues: false
207+
registers:
208+
- { id: 0, class: intregs, preferred-register: '', flags: [ ] }
209+
- { id: 1, class: intregs, preferred-register: '', flags: [ ] }
210+
- { id: 2, class: intregs, preferred-register: '', flags: [ ] }
211+
- { id: 3, class: intregs, preferred-register: '', flags: [ ] }
212+
- { id: 4, class: intregs, preferred-register: '', flags: [ ] }
213+
- { id: 5, class: intregs, preferred-register: '', flags: [ ] }
214+
- { id: 6, class: intregs, preferred-register: '', flags: [ ] }
215+
- { id: 7, class: intregs, preferred-register: '', flags: [ ] }
216+
- { id: 8, class: intregs, preferred-register: '', flags: [ ] }
217+
- { id: 9, class: intregs, preferred-register: '', flags: [ ] }
218+
- { id: 10, class: intregs, preferred-register: '', flags: [ ] }
219+
- { id: 11, class: predregs, preferred-register: '', flags: [ ] }
220+
- { id: 12, class: predregs, preferred-register: '', flags: [ ] }
221+
- { id: 13, class: intregs, preferred-register: '', flags: [ ] }
222+
liveins:
223+
- { reg: '$r0', virtual-reg: '%9' }
224+
frameInfo:
225+
isFrameAddressTaken: false
226+
isReturnAddressTaken: false
227+
hasStackMap: false
228+
hasPatchPoint: false
229+
stackSize: 0
230+
offsetAdjustment: 0
231+
maxAlignment: 1
232+
adjustsStack: false
233+
hasCalls: false
234+
stackProtector: ''
235+
functionContext: ''
236+
maxCallFrameSize: 4294967295
237+
cvBytesOfCalleeSavedRegisters: 0
238+
hasOpaqueSPAdjustment: false
239+
hasVAStart: false
240+
hasMustTailInVarArgFunc: false
241+
hasTailCall: false
242+
isCalleeSavedInfoValid: false
243+
localFrameSize: 0
244+
savePoint: ''
245+
restorePoint: ''
246+
fixedStack: []
247+
stack: []
248+
entry_values: []
249+
callSites: []
250+
debugValueSubstitutions: []
251+
constants: []
252+
machineFunctionInfo: {}
253+
body: |
254+
bb.0.entry:
255+
successors: %bb.1(0x80000000)
256+
liveins: $r0
257+
258+
%9:intregs = COPY $r0
259+
%0:intregs = A2_addi %9, -4
260+
%1:intregs = S4_addi_asl_ri @a, %9, 3
261+
%2:intregs = A2_tfrsi @a
262+
%10:intregs = A2_tfrsi 256
263+
264+
bb.1.do.body:
265+
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
266+
267+
%3:intregs = PHI %1, %bb.0, %8, %bb.1
268+
%4:intregs = PHI %0, %bb.0, %7, %bb.1
269+
%5:intregs = PHI %10, %bb.0, %13, %bb.1
270+
%13:intregs = S2_extractu %5, 8, 1
271+
S4_storeri_rr %2, %13, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5)
272+
%7:intregs = A2_addi %4, 4
273+
%11:predregs = C2_cmpgti %7, 1073741835
274+
%8:intregs = A2_addi %3, 32
275+
J2_jumpf %11, %bb.1, implicit-def dead $pc
276+
J2_jump %bb.2, implicit-def dead $pc
277+
278+
bb.2.do.end:
279+
PS_jmpret $r31, implicit-def dead $pc
280+
281+
...

llvm/test/CodeGen/Hexagon/swp-phi-start.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
; the same stage.
66

77
; CHECK-DAG: [[REG3:(r[0-9]+)]] = add([[REG1:(r[0-9]+)]],#-1)
8-
; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG1]],#-1)
9-
; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG3]])
8+
; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG4:(r[0-9]+)]],#-1)
9+
; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG2]])
10+
; CHECK-NOT: = [[REG3]]
1011
; CHECK-NOT: = [[REG2]]
1112
; CHECK: .LBB0_[[LOOP]]:
1213
; CHECK: }{{[ \t]*}}:endloop

0 commit comments

Comments
 (0)