@@ -1168,3 +1168,84 @@ loop:
1168
1168
exit:
1169
1169
ret void
1170
1170
}
1171
+
1172
+ ; FIXME: Currently incorrectly narrows the load,
1173
+ ; https://github.com/llvm/llvm-project/issues/156190.
1174
+ define void @multiple_store_groups_storing_same_wide_bin_op (ptr noalias %A , ptr noalias %B , ptr noalias %C ) {
1175
+ ; VF2-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
1176
+ ; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
1177
+ ; VF2-NEXT: [[ENTRY:.*:]]
1178
+ ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1179
+ ; VF2: [[VECTOR_PH]]:
1180
+ ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1181
+ ; VF2: [[VECTOR_BODY]]:
1182
+ ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1183
+ ; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
1184
+ ; VF2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8
1185
+ ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
1186
+ ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
1187
+ ; VF2-NEXT: [[TMP2:%.*]] = fadd contract <2 x double> [[BROADCAST_SPLAT]], splat (double 2.000000e+01)
1188
+ ; VF2-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
1189
+ ; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 8
1190
+ ; VF2-NEXT: [[TMP4:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
1191
+ ; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP4]], align 8
1192
+ ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
1193
+ ; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
1194
+ ; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1195
+ ; VF2: [[MIDDLE_BLOCK]]:
1196
+ ; VF2-NEXT: br [[EXIT:label %.*]]
1197
+ ; VF2: [[SCALAR_PH]]:
1198
+ ;
1199
+ ; VF4-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
1200
+ ; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
1201
+ ; VF4-NEXT: [[ENTRY:.*:]]
1202
+ ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1203
+ ; VF4: [[VECTOR_PH]]:
1204
+ ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1205
+ ; VF4: [[VECTOR_BODY]]:
1206
+ ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1207
+ ; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
1208
+ ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
1209
+ ; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1210
+ ; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1211
+ ; VF4-NEXT: [[TMP1:%.*]] = fadd contract <4 x double> [[STRIDED_VEC]], splat (double 2.000000e+01)
1212
+ ; VF4-NEXT: [[TMP2:%.*]] = fadd contract <4 x double> [[STRIDED_VEC1]], splat (double 2.000000e+01)
1213
+ ; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
1214
+ ; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1215
+ ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
1216
+ ; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
1217
+ ; VF4-NEXT: [[TMP5:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
1218
+ ; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
1219
+ ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1220
+ ; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
1221
+ ; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1222
+ ; VF4: [[MIDDLE_BLOCK]]:
1223
+ ; VF4-NEXT: br [[EXIT:label %.*]]
1224
+ ; VF4: [[SCALAR_PH]]:
1225
+ ;
1226
+ entry:
1227
+ br label %loop
1228
+
1229
+ loop:
1230
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
1231
+ %gep.A = getelementptr { double , double }, ptr %A , i64 %iv
1232
+ %l.A.0 = load double , ptr %gep.A , align 8
1233
+ %gep.A.1 = getelementptr inbounds nuw i8 , ptr %gep.A , i64 8
1234
+ %l.A.1 = load double , ptr %gep.A.1 , align 8
1235
+ %add.0 = fadd contract double %l.A.0 , 20 .0
1236
+ %add.1 = fadd contract double %l.A.1 , 20 .0
1237
+ %gep.B = getelementptr { double , double }, ptr %B , i64 %iv
1238
+ store double %add.0 , ptr %gep.B , align 8
1239
+ %gep.B.1 = getelementptr inbounds nuw i8 , ptr %gep.B , i64 8
1240
+ store double %add.1 , ptr %gep.B.1 , align 8
1241
+ %gep.C = getelementptr { double , double }, ptr %C , i64 %iv
1242
+ %gep.C.1 = getelementptr inbounds nuw i8 , ptr %gep.C , i64 8
1243
+ store double %add.0 , ptr %gep.C , align 8
1244
+ store double %add.1 , ptr %gep.C.1 , align 8
1245
+ %iv.next = add nuw nsw i64 %iv , 1
1246
+ %.not = icmp eq i64 %iv.next , 1000
1247
+ br i1 %.not , label %exit , label %loop
1248
+
1249
+ exit:
1250
+ ret void
1251
+ }
0 commit comments