@@ -187,4 +187,48 @@ end:
187
187
ret void ;
188
188
}
189
189
190
+ define arm_aapcs_vfpcc void @invariant_add (i32* noalias nocapture readonly %data , i32* noalias nocapture %dst , i32 %n.vec ) {
191
+ ; CHECK-LABEL: @invariant_add(
192
+ ; CHECK-NEXT: vector.ph:
193
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
194
+ ; CHECK: vector.body:
195
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
196
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
197
+ ; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3>
198
+ ; CHECK-NEXT: [[L1:%.*]] = add <4 x i32> [[L0]], [[VEC_IND]]
199
+ ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[L1]], i32 32, i32 2, i32 1)
200
+ ; CHECK-NEXT: [[L3:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[INDEX]]
201
+ ; CHECK-NEXT: [[L4:%.*]] = bitcast i32* [[L3]] to <4 x i32>*
202
+ ; CHECK-NEXT: store <4 x i32> [[TMP0]], <4 x i32>* [[L4]], align 4
203
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
204
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
205
+ ; CHECK-NEXT: [[L5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]]
206
+ ; CHECK-NEXT: br i1 [[L5]], label [[END:%.*]], label [[VECTOR_BODY]]
207
+ ; CHECK: end:
208
+ ; CHECK-NEXT: ret void
209
+ ;
210
+
211
+ vector.ph:
212
+ br label %vector.body
213
+
214
+ vector.body: ; preds = %vector.body, %vector.ph
215
+ %index = phi i32 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
216
+ %vec.ind = phi <4 x i32 > [ <i32 0 , i32 2 , i32 4 , i32 6 >, %vector.ph ], [ %vec.ind.next , %vector.body ]
217
+ %l0 = mul <4 x i32 > %vec.ind , <i32 3 , i32 3 , i32 3 , i32 3 >
218
+ %l1 = add <4 x i32 > %l0 , %vec.ind
219
+ %l2 = getelementptr inbounds i32 , i32* %data , <4 x i32 > %l1
220
+ %wide.masked.gather = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0i32 (<4 x i32* > %l2 , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
221
+ %l3 = getelementptr inbounds i32 , i32* %dst , i32 %index
222
+ %l4 = bitcast i32* %l3 to <4 x i32 >*
223
+ store <4 x i32 > %wide.masked.gather , <4 x i32 >* %l4 , align 4
224
+ %index.next = add i32 %index , 4
225
+ %vec.ind.next = add <4 x i32 > %vec.ind , <i32 8 , i32 8 , i32 8 , i32 8 >
226
+ %l5 = icmp eq i32 %index.next , %n.vec
227
+ br i1 %l5 , label %end , label %vector.body
228
+
229
+ end:
230
+ ret void ;
231
+ }
232
+
233
+
190
234
declare <4 x i32 > @llvm.masked.gather.v4i32.v4p0i32 (<4 x i32* >, i32 , <4 x i1 >, <4 x i32 >)
0 commit comments