@@ -1013,6 +1013,133 @@ for.inc9: ; preds = %for.end
10131013for.end11: ; preds = %for.cond
10141014 ret void
10151015}
1016+
1017+ ; This test contains an example of a SAXPY loop manually unrolled by five:
1018+ ;
1019+ ; void saxpy(long n, float a, float *x, float *y) {
1020+ ; for (int i = 0; i < n; i += 5) {
1021+ ; y[i] += a * x[i];
1022+ ; y[i + 1] += a * x[i + 1];
1023+ ; y[i + 2] += a * x[i + 2];
1024+ ; y[i + 3] += a * x[i + 3];
1025+ ; y[i + 4] += a * x[i + 4];
1026+ ; }
1027+ ; }
1028+ ;
1029+ define void @saxpy_5 (i64 %n , float %a , ptr readonly %x , ptr noalias %y ) {
1030+ ; CHECK-LABEL: define void @saxpy_5(
1031+ ; CHECK-SAME: i64 [[N:%.*]], float [[A:%.*]], ptr readonly captures(none) [[X:%.*]], ptr noalias captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
1032+ ; CHECK-NEXT: [[ENTRY:.*:]]
1033+ ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 0
1034+ ; CHECK-NEXT: br i1 [[TMP0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
1035+ ; CHECK: [[LOOP_PREHEADER]]:
1036+ ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[N]], -1
1037+ ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 5
1038+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1039+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6
1040+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_PREHEADER11:.*]], label %[[VECTOR_PH:.*]]
1041+ ; CHECK: [[VECTOR_PH]]:
1042+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775806
1043+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 5
1044+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[A]], i64 0
1045+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <10 x i32> zeroinitializer
1046+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1047+ ; CHECK: [[VECTOR_BODY]]:
1048+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1049+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
1050+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[OFFSET_IDX]]
1051+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <10 x float>, ptr [[TMP6]], align 4
1052+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[OFFSET_IDX]]
1053+ ; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <10 x float>, ptr [[TMP7]], align 4
1054+ ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <10 x float> [[WIDE_VEC]], [[TMP5]]
1055+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <10 x float> [[WIDE_VEC5]], [[TMP8]]
1056+ ; CHECK-NEXT: store <10 x float> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
1057+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1058+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1059+ ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1060+ ; CHECK: [[MIDDLE_BLOCK]]:
1061+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1062+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT]], label %[[LOOP_PREHEADER11]]
1063+ ; CHECK: [[LOOP_PREHEADER11]]:
1064+ ; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
1065+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
1066+ ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> zeroinitializer
1067+ ; CHECK-NEXT: br label %[[LOOP:.*]]
1068+ ; CHECK: [[LOOP]]:
1069+ ; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[I1_PH]], %[[LOOP_PREHEADER11]] ]
1070+ ; CHECK-NEXT: [[XGEP1:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[I1]]
1071+ ; CHECK-NEXT: [[YGEP1:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[I1]]
1072+ ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, ptr [[XGEP1]], align 4
1073+ ; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP11]]
1074+ ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[YGEP1]], align 4
1075+ ; CHECK-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP13]]
1076+ ; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[YGEP1]], align 4
1077+ ; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i64 [[I1]], 4
1078+ ; CHECK-NEXT: [[XGEP5:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[I5]]
1079+ ; CHECK-NEXT: [[X5:%.*]] = load float, ptr [[XGEP5]], align 4
1080+ ; CHECK-NEXT: [[AX5:%.*]] = fmul fast float [[X5]], [[A]]
1081+ ; CHECK-NEXT: [[YGEP5:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[I5]]
1082+ ; CHECK-NEXT: [[Y5:%.*]] = load float, ptr [[YGEP5]], align 4
1083+ ; CHECK-NEXT: [[AXPY5:%.*]] = fadd fast float [[Y5]], [[AX5]]
1084+ ; CHECK-NEXT: store float [[AXPY5]], ptr [[YGEP5]], align 4
1085+ ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I1]], 5
1086+ ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], [[I_NEXT]]
1087+ ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
1088+ ; CHECK: [[EXIT]]:
1089+ ; CHECK-NEXT: ret void
1090+ ;
1091+ entry:
1092+ %0 = icmp sgt i64 %n , 0
1093+ br i1 %0 , label %loop , label %exit
1094+
1095+ loop:
1096+ %i1 = phi i64 [ %i.next , %loop ], [ 0 , %entry ]
1097+ %xgep1 = getelementptr inbounds nuw float , ptr %x , i64 %i1
1098+ %x1 = load float , ptr %xgep1 , align 4
1099+ %ax1 = fmul fast float %x1 , %a
1100+ %ygep1 = getelementptr inbounds nuw float , ptr %y , i64 %i1
1101+ %y1 = load float , ptr %ygep1 , align 4
1102+ %axpy1 = fadd fast float %y1 , %ax1
1103+ store float %axpy1 , ptr %ygep1 , align 4
1104+ %i2 = add nuw nsw i64 %i1 , 1
1105+ %xgep2 = getelementptr inbounds nuw float , ptr %x , i64 %i2
1106+ %x2 = load float , ptr %xgep2 , align 4
1107+ %ax2 = fmul fast float %x2 , %a
1108+ %ygep2 = getelementptr inbounds nuw float , ptr %y , i64 %i2
1109+ %y2 = load float , ptr %ygep2 , align 4
1110+ %axpy2 = fadd fast float %y2 , %ax2
1111+ store float %axpy2 , ptr %ygep2 , align 4
1112+ %i3 = add nuw nsw i64 %i1 , 2
1113+ %xgep3 = getelementptr inbounds nuw float , ptr %x , i64 %i3
1114+ %x3 = load float , ptr %xgep3 , align 4
1115+ %ax3 = fmul fast float %x3 , %a
1116+ %ygep3 = getelementptr inbounds nuw float , ptr %y , i64 %i3
1117+ %y3 = load float , ptr %ygep3 , align 4
1118+ %axpy3 = fadd fast float %y3 , %ax3
1119+ store float %axpy3 , ptr %ygep3 , align 4
1120+ %i4 = add nuw nsw i64 %i1 , 3
1121+ %xgep4 = getelementptr inbounds nuw float , ptr %x , i64 %i4
1122+ %x4 = load float , ptr %xgep4 , align 4
1123+ %ax4 = fmul fast float %x4 , %a
1124+ %ygep4 = getelementptr inbounds nuw float , ptr %y , i64 %i4
1125+ %y4 = load float , ptr %ygep4 , align 4
1126+ %axpy4 = fadd fast float %y4 , %ax4
1127+ store float %axpy4 , ptr %ygep4 , align 4
1128+ %i5 = add nuw nsw i64 %i1 , 4
1129+ %xgep5 = getelementptr inbounds nuw float , ptr %x , i64 %i5
1130+ %x5 = load float , ptr %xgep5 , align 4
1131+ %ax5 = fmul fast float %x5 , %a
1132+ %ygep5 = getelementptr inbounds nuw float , ptr %y , i64 %i5
1133+ %y5 = load float , ptr %ygep5 , align 4
1134+ %axpy5 = fadd fast float %y5 , %ax5
1135+ store float %axpy5 , ptr %ygep5 , align 4
1136+ %i.next = add nuw nsw i64 %i1 , 5
1137+ %cmp = icmp sgt i64 %n , %i.next
1138+ br i1 %cmp , label %loop , label %exit
1139+
1140+ exit:
1141+ ret void
1142+ }
10161143;.
10171144; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
10181145; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1023,4 +1150,6 @@ for.end11: ; preds = %for.cond
10231150; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
10241151; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
10251152; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1153+ ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
1154+ ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
10261155;.
0 commit comments