@@ -153,3 +153,75 @@ loop:
153153exit:
154154 ret void
155155}
156+
157+ define void @narrow_widen_store_user (i32 %x , ptr noalias %A , ptr noalias %B ) {
158+ ; VF4IC1-LABEL: define void @narrow_widen_store_user(
159+ ; VF4IC1-SAME: i32 [[X:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
160+ ; VF4IC1-NEXT: [[ENTRY:.*:]]
161+ ; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]]
162+ ; VF4IC1: [[VECTOR_PH]]:
163+ ; VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
164+ ; VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
165+ ; VF4IC1-NEXT: [[TMP0:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
166+ ; VF4IC1-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[TMP0]], splat (i32 3)
167+ ; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
168+ ; VF4IC1: [[VECTOR_BODY]]:
169+ ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
170+ ; VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
171+ ; VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
172+ ; VF4IC1-NEXT: store <4 x i32> [[TMP0]], ptr [[TMP2]], align 4
173+ ; VF4IC1-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4
174+ ; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
175+ ; VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
176+ ; VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
177+ ; VF4IC1: [[MIDDLE_BLOCK]]:
178+ ; VF4IC1-NEXT: br label %[[EXIT:.*]]
179+ ; VF4IC1: [[EXIT]]:
180+ ; VF4IC1-NEXT: ret void
181+ ;
182+ ; VF2IC2-LABEL: define void @narrow_widen_store_user(
183+ ; VF2IC2-SAME: i32 [[X:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
184+ ; VF2IC2-NEXT: [[ENTRY:.*:]]
185+ ; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]]
186+ ; VF2IC2: [[VECTOR_PH]]:
187+ ; VF2IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i64 0
188+ ; VF2IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
189+ ; VF2IC2-NEXT: [[TMP0:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
190+ ; VF2IC2-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[TMP0]], splat (i32 3)
191+ ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
192+ ; VF2IC2: [[VECTOR_BODY]]:
193+ ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
194+ ; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
195+ ; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
196+ ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2
197+ ; VF2IC2-NEXT: store <2 x i32> [[TMP0]], ptr [[TMP2]], align 4
198+ ; VF2IC2-NEXT: store <2 x i32> [[TMP0]], ptr [[TMP4]], align 4
199+ ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 2
200+ ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP3]], align 4
201+ ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP5]], align 4
202+ ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
203+ ; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
204+ ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
205+ ; VF2IC2: [[MIDDLE_BLOCK]]:
206+ ; VF2IC2-NEXT: br label %[[EXIT:.*]]
207+ ; VF2IC2: [[EXIT]]:
208+ ; VF2IC2-NEXT: ret void
209+ ;
210+ entry:
211+ br label %loop
212+
213+ loop:
214+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
215+ %gep.A = getelementptr i32 , ptr %A , i32 %iv
216+ %gep.B = getelementptr i32 , ptr %B , i32 %iv
217+ %wide.add = add i32 %x , 1
218+ %wide.mul = mul i32 %wide.add , 3
219+ store i32 %wide.add , ptr %gep.A
220+ store i32 %wide.mul , ptr %gep.B
221+ %iv.next = add i32 %iv , 1
222+ %ec = icmp ne i32 %iv.next , 1024
223+ br i1 %ec , label %loop , label %exit
224+
225+ exit:
226+ ret void
227+ }
0 commit comments