Skip to content

Commit 7b82873

Browse files
committed
[LV] Add tests with multiple store groups re-using widened ops.
Test coverage for #156190.
1 parent 0048337 commit 7b82873

File tree

2 files changed

+154
-0
lines changed

2 files changed

+154
-0
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,3 +1168,84 @@ loop:
11681168
exit:
11691169
ret void
11701170
}
1171+
1172+
; FIXME: Currently incorrectly narrows the load,
1173+
; https://github.com/llvm/llvm-project/issues/156190.
1174+
define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
1175+
; VF2-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
1176+
; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
1177+
; VF2-NEXT: [[ENTRY:.*:]]
1178+
; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1179+
; VF2: [[VECTOR_PH]]:
1180+
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1181+
; VF2: [[VECTOR_BODY]]:
1182+
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1183+
; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
1184+
; VF2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8
1185+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
1186+
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
1187+
; VF2-NEXT: [[TMP2:%.*]] = fadd contract <2 x double> [[BROADCAST_SPLAT]], splat (double 2.000000e+01)
1188+
; VF2-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
1189+
; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 8
1190+
; VF2-NEXT: [[TMP4:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
1191+
; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP4]], align 8
1192+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
1193+
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
1194+
; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1195+
; VF2: [[MIDDLE_BLOCK]]:
1196+
; VF2-NEXT: br [[EXIT:label %.*]]
1197+
; VF2: [[SCALAR_PH]]:
1198+
;
1199+
; VF4-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
1200+
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
1201+
; VF4-NEXT: [[ENTRY:.*:]]
1202+
; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1203+
; VF4: [[VECTOR_PH]]:
1204+
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1205+
; VF4: [[VECTOR_BODY]]:
1206+
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1207+
; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
1208+
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
1209+
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1210+
; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1211+
; VF4-NEXT: [[TMP1:%.*]] = fadd contract <4 x double> [[STRIDED_VEC]], splat (double 2.000000e+01)
1212+
; VF4-NEXT: [[TMP2:%.*]] = fadd contract <4 x double> [[STRIDED_VEC1]], splat (double 2.000000e+01)
1213+
; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
1214+
; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1215+
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
1216+
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
1217+
; VF4-NEXT: [[TMP5:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
1218+
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
1219+
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1220+
; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
1221+
; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1222+
; VF4: [[MIDDLE_BLOCK]]:
1223+
; VF4-NEXT: br [[EXIT:label %.*]]
1224+
; VF4: [[SCALAR_PH]]:
1225+
;
1226+
entry:
1227+
br label %loop
1228+
1229+
loop:
1230+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
1231+
%gep.A = getelementptr { double, double }, ptr %A, i64 %iv
1232+
%l.A.0 = load double, ptr %gep.A, align 8
1233+
%gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A, i64 8
1234+
%l.A.1 = load double, ptr %gep.A.1, align 8
1235+
%add.0 = fadd contract double %l.A.0, 20.0
1236+
%add.1 = fadd contract double %l.A.1, 20.0
1237+
%gep.B = getelementptr { double, double }, ptr %B, i64 %iv
1238+
store double %add.0, ptr %gep.B, align 8
1239+
%gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B, i64 8
1240+
store double %add.1, ptr %gep.B.1, align 8
1241+
%gep.C = getelementptr { double, double }, ptr %C, i64 %iv
1242+
%gep.C.1 = getelementptr inbounds nuw i8, ptr %gep.C, i64 8
1243+
store double %add.0, ptr %gep.C, align 8
1244+
store double %add.1, ptr %gep.C.1, align 8
1245+
%iv.next = add nuw nsw i64 %iv, 1
1246+
%.not = icmp eq i64 %iv.next, 1000
1247+
br i1 %.not, label %exit, label %loop
1248+
1249+
exit:
1250+
ret void
1251+
}

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,3 +585,76 @@ loop:
585585
exit:
586586
ret void
587587
}
588+
589+
define void @multiple_store_groups_storing_same_load_group(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
590+
; VF2-LABEL: define void @multiple_store_groups_storing_same_load_group(
591+
; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
592+
; VF2-NEXT: [[ENTRY:.*:]]
593+
; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
594+
; VF2: [[VECTOR_PH]]:
595+
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
596+
; VF2: [[VECTOR_BODY]]:
597+
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
598+
; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
599+
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
600+
; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
601+
; VF2-NEXT: [[TMP1:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
602+
; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP1]], align 8
603+
; VF2-NEXT: [[TMP2:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
604+
; VF2-NEXT: store <2 x double> [[WIDE_LOAD1]], ptr [[TMP2]], align 8
605+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
606+
; VF2-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
607+
; VF2-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
608+
; VF2: [[MIDDLE_BLOCK]]:
609+
; VF2-NEXT: br [[EXIT:label %.*]]
610+
; VF2: [[SCALAR_PH]]:
611+
;
612+
; VF4-LABEL: define void @multiple_store_groups_storing_same_load_group(
613+
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
614+
; VF4-NEXT: [[ENTRY:.*:]]
615+
; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
616+
; VF4: [[VECTOR_PH]]:
617+
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
618+
; VF4: [[VECTOR_BODY]]:
619+
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
620+
; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
621+
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
622+
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
623+
; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
624+
; VF4-NEXT: [[TMP1:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
625+
; VF4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[STRIDED_VEC]], <4 x double> [[STRIDED_VEC1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
626+
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
627+
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
628+
; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
629+
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
630+
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
631+
; VF4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
632+
; VF4-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
633+
; VF4: [[MIDDLE_BLOCK]]:
634+
; VF4-NEXT: br [[EXIT:label %.*]]
635+
; VF4: [[SCALAR_PH]]:
636+
;
637+
entry:
638+
br label %loop
639+
640+
loop:
641+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
642+
%gep.A = getelementptr { double, double }, ptr %A, i64 %iv
643+
%gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A, i64 8
644+
%l.A.0 = load double, ptr %gep.A, align 8
645+
%l.A.1 = load double, ptr %gep.A.1, align 8
646+
%gep.B = getelementptr { double, double }, ptr %B, i64 %iv
647+
%gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B, i64 8
648+
store double %l.A.0, ptr %gep.B, align 8
649+
store double %l.A.1, ptr %gep.B.1, align 8
650+
%gep.C = getelementptr { double, double }, ptr %C, i64 %iv
651+
%gep.C.1 = getelementptr inbounds nuw i8, ptr %gep.C, i64 8
652+
store double %l.A.0, ptr %gep.C, align 8
653+
store double %l.A.1, ptr %gep.C.1, align 8
654+
%iv.next = add nuw nsw i64 %iv, 1
655+
%.not = icmp eq i64 %iv.next, 1000
656+
br i1 %.not, label %exit, label %loop
657+
658+
exit:
659+
ret void
660+
}

0 commit comments

Comments
 (0)