@@ -589,88 +589,100 @@ for.exit: ; preds = %for.body
589589 ret i32 %add
590590}
591591
592- define i32 @not_dotp_not_phi (ptr %a , ptr %b ) {
592+ define i32 @not_dotp_not_phi (ptr %a , ptr noalias %b , ptr noalias %c ) {
593593; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi(
594- ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
594+ ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C :%.*]]) #[[ATTR0]] {
595595; CHECK-INTERLEAVE1-NEXT: entry:
596596; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597597; CHECK-INTERLEAVE1: vector.ph:
598598; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
599599; CHECK-INTERLEAVE1: vector.body:
600600; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
601- ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8 :%.*]], [[VECTOR_BODY]] ]
601+ ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7 :%.*]], [[VECTOR_BODY]] ]
602602; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
603- ; CHECK-INTERLEAVE1-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
604- ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2 ]], align 1
605- ; CHECK-INTERLEAVE1-NEXT: [[TMP3 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
603+ ; CHECK-INTERLEAVE1-NEXT: [[TMP3 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
604+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3 ]], align 1
605+ ; CHECK-INTERLEAVE1-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
606606; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
607- ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
608- ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
609- ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
610- ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
611- ; CHECK-INTERLEAVE1-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
607+ ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
608+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
609+ ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
610+ ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
611+ ; CHECK-INTERLEAVE1-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
612+ ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
613+ ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
614+ ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
615+ ; CHECK-INTERLEAVE1-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
612616; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
613617; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
614618; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
615619; CHECK-INTERLEAVE1: middle.block:
616- ; CHECK-INTERLEAVE1-NEXT: [[TMP10 :%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
617- ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
620+ ; CHECK-INTERLEAVE1-NEXT: [[TMP12 :%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
621+ ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
618622; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
619623; CHECK-INTERLEAVE1: scalar.ph:
620624;
621625; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi(
622- ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
626+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C :%.*]]) #[[ATTR0]] {
623627; CHECK-INTERLEAVED-NEXT: entry:
624628; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
625629; CHECK-INTERLEAVED: vector.ph:
626630; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
627631; CHECK-INTERLEAVED: vector.body:
628632; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
629- ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8 :%.*]], [[VECTOR_BODY]] ]
633+ ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7 :%.*]], [[VECTOR_BODY]] ]
630634; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
631- ; CHECK-INTERLEAVED-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
632- ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2 ]], align 1
633- ; CHECK-INTERLEAVED-NEXT: [[TMP3 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
635+ ; CHECK-INTERLEAVED-NEXT: [[TMP3 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
636+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3 ]], align 1
637+ ; CHECK-INTERLEAVED-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
634638; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
635- ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
636- ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
637- ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
638- ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
639- ; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
639+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
640+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
641+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
642+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
643+ ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
644+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
645+ ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
646+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
647+ ; CHECK-INTERLEAVED-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
640648; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
641649; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
642650; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
643651; CHECK-INTERLEAVED: middle.block:
644- ; CHECK-INTERLEAVED-NEXT: [[TMP10 :%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
645- ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
652+ ; CHECK-INTERLEAVED-NEXT: [[TMP12 :%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
653+ ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
646654; CHECK-INTERLEAVED-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
647655; CHECK-INTERLEAVED: scalar.ph:
648656;
649657; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi(
650- ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
658+ ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C :%.*]]) #[[ATTR0]] {
651659; CHECK-MAXBW-NEXT: entry:
652660; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
653661; CHECK-MAXBW: vector.ph:
654662; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
655663; CHECK-MAXBW: vector.body:
656664; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
657- ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8 :%.*]], [[VECTOR_BODY]] ]
665+ ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7 :%.*]], [[VECTOR_BODY]] ]
658666; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
659- ; CHECK-MAXBW-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
660- ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2 ]], align 1
661- ; CHECK-MAXBW-NEXT: [[TMP3 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
667+ ; CHECK-MAXBW-NEXT: [[TMP3 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
668+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3 ]], align 1
669+ ; CHECK-MAXBW-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
662670; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
663- ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
664- ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
665- ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
666- ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
667- ; CHECK-MAXBW-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
671+ ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
672+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
673+ ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
674+ ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
675+ ; CHECK-MAXBW-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
676+ ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
677+ ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
678+ ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
679+ ; CHECK-MAXBW-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
668680; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
669681; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
670682; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
671683; CHECK-MAXBW: middle.block:
672- ; CHECK-MAXBW-NEXT: [[TMP10 :%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
673- ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
684+ ; CHECK-MAXBW-NEXT: [[TMP12 :%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
685+ ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
674686; CHECK-MAXBW-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
675687; CHECK-MAXBW: scalar.ph:
676688;
@@ -688,6 +700,8 @@ for.body: ; preds = %for.body, %entry
688700 %ext.b = zext i8 %load.b to i32
689701 %mul = mul i32 %ext.b , %ext.a
690702 %add = add i32 %mul , %ext.b
703+ %gep.c = getelementptr i32 , ptr %c , i64 %iv
704+ store i32 %accum , ptr %gep.c
691705 %iv.next = add i64 %iv , 1
692706 %exitcond.not = icmp eq i64 %iv.next , 1024
693707 br i1 %exitcond.not , label %for.exit , label %for.body
@@ -946,6 +960,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) {
946960; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
947961; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
948962; CHECK-MAXBW: scalar.ph:
963+ ;
949964entry:
950965 br label %for.body
951966
0 commit comments