Skip to content

Commit ec32c6c

Browse files
committed
Mitigate regressions
1 parent 60dcc09 commit ec32c6c

File tree

2 files changed

+211
-2
lines changed

2 files changed

+211
-2
lines changed
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# RUN: llc -mtriple=hexagon -mcpu=hexagonv73 -O2 -mattr=+hvxv73,hvx-length64b \
2+
# RUN: -run-pass=pipeliner -debug-only=pipeliner 2>&1 \
3+
# RUN: %s -o - | FileCheck %s
4+
# REQUIRES: asserts
5+
6+
# Check that the loop is software pipelined.
7+
8+
# CHECK: Schedule Found? 1 (II=4)
9+
10+
--- |
11+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
12+
target triple = "hexagon"
13+
14+
; Function Attrs: nounwind
15+
define void @ham(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5) #0 {
16+
bb:
17+
%ashr = ashr i32 %arg3, 2
18+
%ashr6 = ashr i32 %arg3, 1
19+
%add = add nsw i32 %ashr6, %ashr
20+
%icmp = icmp sgt i32 %arg2, 0
21+
br i1 %icmp, label %bb7, label %bb61
22+
23+
bb7: ; preds = %bb
24+
%sdiv = sdiv i32 %arg1, 64
25+
br label %bb9
26+
27+
bb9: ; preds = %bb57, %bb7
28+
%phi = phi i32 [ 0, %bb7 ], [ %add58, %bb57 ]
29+
%0 = icmp sgt i32 %arg1, 63
30+
%ashr10 = ashr exact i32 %phi, 1
31+
%mul = mul nsw i32 %ashr10, %arg3
32+
br i1 %0, label %bb11, label %bb57
33+
34+
bb11: ; preds = %bb9
35+
%add12 = add nsw i32 %phi, 1
36+
%mul13 = mul nsw i32 %add12, %arg5
37+
%mul14 = mul nsw i32 %phi, %arg5
38+
%add15 = add i32 %add, %mul
39+
%add16 = add i32 %mul, %ashr
40+
%add17 = add i32 %mul, %ashr6
41+
%cgep = getelementptr inbounds i8, ptr %arg4, i32 %mul13
42+
%cgep1 = getelementptr inbounds i8, ptr %arg4, i32 %mul14
43+
%cgep2 = getelementptr inbounds i16, ptr %arg, i32 %add15
44+
%cgep3 = getelementptr inbounds i16, ptr %arg, i32 %add16
45+
%cgep4 = getelementptr inbounds i16, ptr %arg, i32 %add17
46+
%cgep5 = getelementptr inbounds i16, ptr %arg, i32 %mul
47+
br label %bb28
48+
49+
bb28: ; preds = %bb28, %bb11
50+
%phi29 = phi i32 [ 0, %bb11 ], [ %add54, %bb28 ]
51+
%phi30 = phi ptr [ %cgep5, %bb11 ], [ %cgep6, %bb28 ]
52+
%phi31 = phi ptr [ %cgep4, %bb11 ], [ %cgep7, %bb28 ]
53+
%phi32 = phi ptr [ %cgep3, %bb11 ], [ %cgep8, %bb28 ]
54+
%phi33 = phi ptr [ %cgep2, %bb11 ], [ %cgep9, %bb28 ]
55+
%phi34 = phi ptr [ %cgep, %bb11 ], [ %cgep11, %bb28 ]
56+
%phi35 = phi ptr [ %cgep1, %bb11 ], [ %cgep10, %bb28 ]
57+
%load = load <16 x i32>, ptr %phi30, align 64
58+
%load38 = load <16 x i32>, ptr %phi31, align 64
59+
%load40 = load <16 x i32>, ptr %phi32, align 64
60+
%load42 = load <16 x i32>, ptr %phi33, align 64
61+
%call = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load, <16 x i32> %load38)
62+
%call43 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load, <16 x i32> %load38)
63+
%call44 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load40, <16 x i32> %load42)
64+
%call45 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load40, <16 x i32> %load42)
65+
%call46 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call, <16 x i32> %call44)
66+
%call47 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call, <16 x i32> %call44)
67+
%call48 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call43, <16 x i32> %call45)
68+
%call49 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call43, <16 x i32> %call45)
69+
%call50 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call47, <16 x i32> %call46)
70+
%call51 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call49, <16 x i32> %call48)
71+
store <16 x i32> %call50, ptr %phi35, align 64
72+
store <16 x i32> %call51, ptr %phi34, align 64
73+
%add54 = add nsw i32 %phi29, 1
74+
%icmp55 = icmp slt i32 %add54, %sdiv
75+
%cgep6 = getelementptr inbounds <16 x i32>, ptr %phi30, i32 1
76+
%cgep7 = getelementptr inbounds <16 x i32>, ptr %phi31, i32 1
77+
%cgep8 = getelementptr inbounds <16 x i32>, ptr %phi32, i32 1
78+
%cgep9 = getelementptr inbounds <16 x i32>, ptr %phi33, i32 1
79+
%cgep10 = getelementptr inbounds <16 x i32>, ptr %phi35, i32 1
80+
%cgep11 = getelementptr inbounds <16 x i32>, ptr %phi34, i32 1
81+
br i1 %icmp55, label %bb28, label %bb57
82+
83+
bb57: ; preds = %bb28, %bb9
84+
%add58 = add nsw i32 %phi, 2
85+
%icmp59 = icmp slt i32 %add58, %arg2
86+
br i1 %icmp59, label %bb9, label %bb61
87+
88+
bb61: ; preds = %bb57, %bb
89+
ret void
90+
}
91+
92+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
93+
declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1
94+
95+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
96+
declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1
97+
98+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
99+
declare <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32>, <16 x i32>) #1
100+
101+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
102+
declare <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32>, <16 x i32>) #1
103+
104+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
105+
declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1
106+
107+
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" }
108+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
109+
110+
...
111+
---
112+
name: ham
113+
alignment: 16
114+
tracksRegLiveness: true
115+
body: |
116+
bb.0.bb:
117+
successors: %bb.1(0x50000000), %bb.6(0x30000000)
118+
liveins: $r0, $r1, $r2, $r3, $r4, $r5
119+
120+
%32:intregs = COPY $r5
121+
%31:intregs = COPY $r4
122+
%30:intregs = COPY $r3
123+
%29:intregs = COPY $r2
124+
%28:intregs = COPY $r1
125+
%27:intregs = COPY $r0
126+
%33:predregs = C2_cmpgti %29, 0
127+
J2_jumpf %33, %bb.6, implicit-def dead $pc
128+
J2_jump %bb.1, implicit-def dead $pc
129+
130+
bb.1.bb7:
131+
successors: %bb.2(0x80000000)
132+
133+
%0:intregs = S2_asr_i_r %30, 2
134+
%1:intregs = S2_asr_i_r %30, 1
135+
%2:intregs = nsw A2_add %1, %0
136+
%36:intregs = S2_asr_i_r %28, 31
137+
%37:intregs = S2_lsr_i_r_acc %28, killed %36, 26
138+
%3:intregs = S2_asr_i_r killed %37, 6
139+
%35:intregs = A2_tfrsi 0
140+
%38:predregs = C2_cmpgti %28, 63
141+
%63:intregs = A2_addi %29, 1
142+
%64:intregs = S2_lsr_i_r %63, 1
143+
%65:intregs = COPY %64
144+
J2_loop1r %bb.2, %65, implicit-def $lc1, implicit-def $sa1
145+
146+
bb.2.bb9 (machine-block-address-taken):
147+
successors: %bb.3(0x40000000), %bb.5(0x40000000)
148+
149+
%4:intregs = PHI %35, %bb.1, %26, %bb.5
150+
J2_jumpf %38, %bb.5, implicit-def dead $pc
151+
J2_jump %bb.3, implicit-def dead $pc
152+
153+
bb.3.bb11:
154+
successors: %bb.4(0x80000000)
155+
156+
%40:intregs = exact S2_asr_i_r %4, 1
157+
%5:intregs = nsw M2_mpyi %40, %30
158+
%42:intregs = nsw A2_addi %4, 1
159+
%43:intregs = A2_add %2, %5
160+
%44:intregs = A2_add %5, %0
161+
%45:intregs = A2_add %5, %1
162+
%6:intregs = M2_maci %31, killed %42, %32
163+
%7:intregs = M2_maci %31, %4, %32
164+
%8:intregs = S2_addasl_rrri %27, killed %43, 1
165+
%9:intregs = S2_addasl_rrri %27, killed %44, 1
166+
%10:intregs = S2_addasl_rrri %27, killed %45, 1
167+
%11:intregs = S2_addasl_rrri %27, %5, 1
168+
%62:intregs = COPY %3
169+
J2_loop0r %bb.4, %62, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
170+
171+
bb.4.bb28 (machine-block-address-taken):
172+
successors: %bb.4(0x7c000000), %bb.5(0x04000000)
173+
174+
%13:intregs = PHI %11, %bb.3, %20, %bb.4
175+
%14:intregs = PHI %10, %bb.3, %21, %bb.4
176+
%15:intregs = PHI %9, %bb.3, %22, %bb.4
177+
%16:intregs = PHI %8, %bb.3, %23, %bb.4
178+
%17:intregs = PHI %6, %bb.3, %25, %bb.4
179+
%18:intregs = PHI %7, %bb.3, %24, %bb.4
180+
%46:hvxvr, %20:intregs = V6_vL32b_pi %13, 64 :: (load (s512) from %ir.phi30)
181+
%47:hvxvr, %21:intregs = V6_vL32b_pi %14, 64 :: (load (s512) from %ir.phi31)
182+
%48:hvxvr, %22:intregs = V6_vL32b_pi %15, 64 :: (load (s512) from %ir.phi32)
183+
%49:hvxvr, %23:intregs = V6_vL32b_pi %16, 64 :: (load (s512) from %ir.phi33)
184+
%50:hvxvr = V6_vaddh %46, %47
185+
%51:hvxvr = V6_vsubh %46, %47
186+
%52:hvxvr = V6_vaddh %48, %49
187+
%53:hvxvr = V6_vsubh %48, %49
188+
%54:hvxvr = V6_vavgh %50, %52
189+
%55:hvxvr = V6_vnavgh %50, %52
190+
%56:hvxvr = V6_vavgh %51, %53
191+
%57:hvxvr = V6_vnavgh %51, %53
192+
%58:hvxvr = V6_vsathub killed %55, killed %54
193+
%59:hvxvr = V6_vsathub killed %57, killed %56
194+
%24:intregs = V6_vS32b_pi %18, 64, killed %58 :: (store (s512) into %ir.phi35)
195+
%25:intregs = V6_vS32b_pi %17, 64, killed %59 :: (store (s512) into %ir.phi34)
196+
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
197+
J2_jump %bb.5, implicit-def dead $pc
198+
199+
bb.5.bb57:
200+
successors: %bb.2(0x7c000000), %bb.6(0x04000000)
201+
202+
%26:intregs = nsw A2_addi %4, 2
203+
ENDLOOP1 %bb.2, implicit-def $pc, implicit-def $lc1, implicit $sa1, implicit $lc1
204+
J2_jump %bb.6, implicit-def dead $pc
205+
206+
bb.6.bb61:
207+
PS_jmpret $r31, implicit-def dead $pc
208+
209+
...

llvm/test/CodeGen/Hexagon/v6-haar-balign32.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; CHECK: .p2align{{.*}}5
33

44
; Function Attrs: nounwind
5-
define void @wobble(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5, ptr noalias nocapture %arg6) #0 {
5+
define void @wobble(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5) #0 {
66
bb:
77
%ashr = ashr i32 %arg3, 2
88
%ashr6 = ashr i32 %arg3, 1
@@ -29,7 +29,7 @@ bb11: ; preds = %bb9
2929
%add16 = add i32 %mul, %ashr
3030
%add17 = add i32 %mul, %ashr6
3131
%getelementptr = getelementptr inbounds i8, ptr %arg4, i32 %mul13
32-
%getelementptr18 = getelementptr inbounds i8, ptr %arg6, i32 %mul14
32+
%getelementptr18 = getelementptr inbounds i8, ptr %arg4, i32 %mul14
3333
%getelementptr19 = getelementptr inbounds i16, ptr %arg, i32 %add15
3434
%getelementptr20 = getelementptr inbounds i16, ptr %arg, i32 %add16
3535
%getelementptr21 = getelementptr inbounds i16, ptr %arg, i32 %add17

0 commit comments

Comments
 (0)