Skip to content

Commit a0aa287

Browse files
fhahnDebadri Basak
authored andcommitted
[LV] Add tests with hoist-able invariant loads.
1 parent 63da162 commit a0aa287

File tree

2 files changed

+293
-0
lines changed

2 files changed

+293
-0
lines changed
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
3+
4+
define void @hoist_invariant_load_noalias_due_to_memchecks(ptr %dst, ptr %invariant_ptr, i32 %n) {
5+
; CHECK-LABEL: define void @hoist_invariant_load_noalias_due_to_memchecks(
6+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[N:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
9+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
10+
; CHECK: [[VECTOR_MEMCHECK]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
12+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
13+
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
14+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4
15+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
16+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4
17+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
18+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]]
19+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
20+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
21+
; CHECK: [[VECTOR_PH]]:
22+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
23+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
24+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
25+
; CHECK: [[VECTOR_BODY]]:
26+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
27+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META0:![0-9]+]]
28+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
29+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
30+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
31+
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
32+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
33+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
34+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
35+
; CHECK: [[MIDDLE_BLOCK]]:
36+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
37+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
38+
; CHECK: [[SCALAR_PH]]:
39+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
40+
; CHECK-NEXT: br label %[[LOOP:.*]]
41+
; CHECK: [[LOOP]]:
42+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
43+
; CHECK-NEXT: [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4
44+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
45+
; CHECK-NEXT: store i32 [[INV_VAL]], ptr [[GEP]], align 4
46+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
47+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
48+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
49+
; CHECK: [[EXIT]]:
50+
; CHECK-NEXT: ret void
51+
;
52+
entry:
53+
br label %loop
54+
55+
loop:
56+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
57+
%inv_val = load i32, ptr %invariant_ptr, align 4
58+
%gep = getelementptr inbounds i32, ptr %dst, i32 %iv
59+
store i32 %inv_val, ptr %gep, align 4
60+
%iv.next = add nuw nsw i32 %iv, 1
61+
%ec = icmp eq i32 %iv.next, %n
62+
br i1 %ec, label %exit, label %loop
63+
64+
exit:
65+
ret void
66+
}
67+
68+
; Test that loads with non-invariant addresses are not hoisted.
69+
define void @dont_hoist_variant_address(ptr %dst, ptr %src, i32 %n) {
70+
; CHECK-LABEL: define void @dont_hoist_variant_address(
71+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) {
72+
; CHECK-NEXT: [[ENTRY:.*]]:
73+
; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
74+
; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[DST]] to i64
75+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
76+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
77+
; CHECK: [[VECTOR_MEMCHECK]]:
78+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[SRC2]]
79+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
80+
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
81+
; CHECK: [[VECTOR_PH]]:
82+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
83+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
84+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
85+
; CHECK: [[VECTOR_BODY]]:
86+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
87+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
88+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
89+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
90+
; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP2]], align 4
91+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
92+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
93+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
94+
; CHECK: [[MIDDLE_BLOCK]]:
95+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
96+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
97+
; CHECK: [[SCALAR_PH]]:
98+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
99+
; CHECK-NEXT: br label %[[LOOP:.*]]
100+
; CHECK: [[LOOP]]:
101+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
102+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
103+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
104+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
105+
; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
106+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
107+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
108+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
109+
; CHECK: [[EXIT]]:
110+
; CHECK-NEXT: ret void
111+
;
112+
entry:
113+
br label %loop
114+
115+
loop:
116+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
117+
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
118+
%val = load i32, ptr %gep.src, align 4
119+
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
120+
store i32 %val, ptr %gep.dst, align 4
121+
%iv.next = add nuw nsw i32 %iv, 1
122+
%ec = icmp eq i32 %iv.next, %n
123+
br i1 %ec, label %exit, label %loop
124+
125+
exit:
126+
ret void
127+
}
128+
129+
; Test that predicated loads are not hoisted.
130+
define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_ptr, i32 %n) {
131+
; CHECK-LABEL: define void @dont_hoist_predicated_load(
132+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], ptr [[COND_PTR:%.*]], i32 [[N:%.*]]) {
133+
; CHECK-NEXT: [[ENTRY:.*]]:
134+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
135+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
136+
; CHECK: [[VECTOR_MEMCHECK]]:
137+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[N]], -1
138+
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
139+
; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP20]], 2
140+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP22]], 4
141+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
142+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND_PTR]], i64 [[TMP3]]
143+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4
144+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
145+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND_PTR]], [[SCEVGEP]]
146+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
147+
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
148+
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]]
149+
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
150+
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
151+
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
152+
; CHECK: [[VECTOR_PH]]:
153+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
154+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
155+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
156+
; CHECK: [[VECTOR_BODY]]:
157+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
158+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[INDEX]]
159+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META11:![0-9]+]]
160+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
161+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
162+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
163+
; CHECK: [[PRED_STORE_IF]]:
164+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]]
165+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
166+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
167+
; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]]
168+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
169+
; CHECK: [[PRED_STORE_CONTINUE]]:
170+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
171+
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
172+
; CHECK: [[PRED_STORE_IF6]]:
173+
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]]
174+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1
175+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP8]]
176+
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4, !alias.scope [[META16]], !noalias [[META18]]
177+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
178+
; CHECK: [[PRED_STORE_CONTINUE7]]:
179+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
180+
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
181+
; CHECK: [[PRED_STORE_IF8]]:
182+
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]]
183+
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 2
184+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]]
185+
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 4, !alias.scope [[META16]], !noalias [[META18]]
186+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
187+
; CHECK: [[PRED_STORE_CONTINUE9]]:
188+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
189+
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
190+
; CHECK: [[PRED_STORE_IF10]]:
191+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]]
192+
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 3
193+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
194+
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP21]], align 4, !alias.scope [[META16]], !noalias [[META18]]
195+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
196+
; CHECK: [[PRED_STORE_CONTINUE11]]:
197+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
198+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
199+
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
200+
; CHECK: [[MIDDLE_BLOCK]]:
201+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
202+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
203+
; CHECK: [[SCALAR_PH]]:
204+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
205+
; CHECK-NEXT: br label %[[LOOP:.*]]
206+
; CHECK: [[LOOP]]:
207+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
208+
; CHECK-NEXT: [[GEP_COND:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[IV]]
209+
; CHECK-NEXT: [[COND:%.*]] = load i32, ptr [[GEP_COND]], align 4
210+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[COND]], 0
211+
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[LOOP_LATCH]]
212+
; CHECK: [[IF_THEN]]:
213+
; CHECK-NEXT: [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4
214+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
215+
; CHECK-NEXT: store i32 [[INV_VAL]], ptr [[GEP]], align 4
216+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
217+
; CHECK: [[LOOP_LATCH]]:
218+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
219+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
220+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
221+
; CHECK: [[EXIT]]:
222+
; CHECK-NEXT: ret void
223+
;
224+
entry:
225+
br label %loop
226+
227+
loop:
228+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
229+
%gep.cond = getelementptr inbounds i32, ptr %cond_ptr, i32 %iv
230+
%cond = load i32, ptr %gep.cond, align 4
231+
%cmp = icmp sgt i32 %cond, 0
232+
br i1 %cmp, label %if.then, label %loop.latch
233+
234+
if.then:
235+
%inv_val = load i32, ptr %invariant_ptr, align 4
236+
%gep = getelementptr inbounds i32, ptr %dst, i32 %iv
237+
store i32 %inv_val, ptr %gep, align 4
238+
br label %loop.latch
239+
240+
loop.latch:
241+
%iv.next = add nuw nsw i32 %iv, 1
242+
%ec = icmp eq i32 %iv.next, %n
243+
br i1 %ec, label %exit, label %loop
244+
245+
exit:
246+
ret void
247+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -passes='default<O3>' -S %s | FileCheck %s
3+
4+
target triple = "arm64-apple-macosx"
5+
6+
%"class.dealii::VectorizedArray" = type { [4 x double] }
7+
8+
define void @hoist_invariant_load(ptr %invariant_ptr, i64 %num_elements, ptr %array) {
9+
; CHECK-LABEL: define void @hoist_invariant_load(
10+
; CHECK-SAME: ptr readonly captures(none) [[INVARIANT_PTR:%.*]], i64 [[NUM_ELEMENTS:%.*]], ptr captures(none) [[ARRAY:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: [[ENTRY:.*]]:
12+
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUM_ELEMENTS]], 0
13+
; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH:.*]]
14+
; CHECK: [[LOOP_LATCH]]:
15+
; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[ENTRY]] ]
16+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]]
17+
; CHECK-NEXT: [[INVARIANT_VAL:%.*]] = load double, ptr [[INVARIANT_PTR]], align 8
18+
; CHECK-NEXT: [[ARRAY_VAL:%.*]] = load double, ptr [[GEP]], align 8
19+
; CHECK-NEXT: [[SUM:%.*]] = fadd double [[INVARIANT_VAL]], [[ARRAY_VAL]]
20+
; CHECK-NEXT: store double [[SUM]], ptr [[GEP]], align 8
21+
; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I2]], 1
22+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[NUM_ELEMENTS]]
23+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]]
24+
; CHECK: [[EXIT]]:
25+
; CHECK-NEXT: ret void
26+
;
27+
entry:
28+
br label %loop.header
29+
30+
loop.header: ; preds = %loop.latch, %entry
31+
%i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ]
32+
%cmp = icmp ult i64 %i, %num_elements
33+
br i1 %cmp, label %loop.latch, label %exit
34+
35+
loop.latch: ; preds = %loop.header
36+
%gep = getelementptr nusw %"class.dealii::VectorizedArray", ptr %array, i64 %i
37+
%invariant_val = load double, ptr %invariant_ptr, align 8
38+
%array_val = load double, ptr %gep, align 8
39+
%sum = fadd double %array_val, %invariant_val
40+
store double %sum, ptr %gep, align 8
41+
%i.next = add i64 %i, 1
42+
br label %loop.header
43+
44+
exit: ; preds = %loop.header
45+
ret void
46+
}

0 commit comments

Comments
 (0)