@@ -1816,13 +1816,12 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
18161816; CHECK-INTERLEAVE1-NEXT: entry:
18171817; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
18181818; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
1819- ; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 16, [[TMP1]]
1820- ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1819+ ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
18211820; CHECK-INTERLEAVE1: vector.ph:
18221821; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
18231822; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
1824- ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 16 , [[TMP3]]
1825- ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 16 , [[N_MOD_VF]]
1823+ ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 41 , [[TMP3]]
1824+ ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 41 , [[N_MOD_VF]]
18261825; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
18271826; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
18281827; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1845,7 +1844,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
18451844; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
18461845; CHECK-INTERLEAVE1: middle.block:
18471846; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP15]])
1848- ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 16 , [[N_VEC]]
1847+ ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 41 , [[N_VEC]]
18491848; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
18501849; CHECK-INTERLEAVE1: scalar.ph:
18511850;
@@ -1854,13 +1853,13 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
18541853; CHECK-INTERLEAVED-NEXT: entry:
18551854; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
18561855; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
1857- ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 16 , [[TMP1]]
1856+ ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41 , [[TMP1]]
18581857; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
18591858; CHECK-INTERLEAVED: vector.ph:
18601859; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
18611860; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
1862- ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 16 , [[TMP3]]
1863- ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 16 , [[N_MOD_VF]]
1861+ ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 41 , [[TMP3]]
1862+ ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 41 , [[N_MOD_VF]]
18641863; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
18651864; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
18661865; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1897,35 +1896,46 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
18971896; CHECK-INTERLEAVED: middle.block:
18981897; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 2 x i64> [[TMP25]], [[TMP24]]
18991898; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[BIN_RDX]])
1900- ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 16 , [[N_VEC]]
1899+ ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 41 , [[N_VEC]]
19011900; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
19021901; CHECK-INTERLEAVED: scalar.ph:
19031902;
19041903; CHECK-MAXBW-LABEL: define i64 @dotp_cost_disagreement(
19051904; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
19061905; CHECK-MAXBW-NEXT: entry:
1907- ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1906+ ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1907+ ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
1908+ ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]]
1909+ ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
19081910; CHECK-MAXBW: vector.ph:
1911+ ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1912+ ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
1913+ ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]]
1914+ ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]]
1915+ ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1916+ ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
19091917; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
19101918; CHECK-MAXBW: vector.body:
19111919; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1912- ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13 :%.*]], [[VECTOR_BODY]] ]
1920+ ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14 :%.*]], [[VECTOR_BODY]] ]
19131921; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
19141922; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0
1915- ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1
1916- ; CHECK-MAXBW-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64>
1923+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1
1924+ ; CHECK-MAXBW-NEXT: [[TMP9 :%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
19171925; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDEX]], 1
19181926; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]]
19191927; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i32 0
1920- ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1
1921- ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i64>
1922- ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <16 x i64> [[TMP6]], [[TMP2]]
1923- ; CHECK-MAXBW-NEXT: [[TMP13]] = add <16 x i64> [[VEC_PHI]], [[TMP14]]
1924- ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1925- ; CHECK-MAXBW-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1928+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
1929+ ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64>
1930+ ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP17]], [[TMP9]]
1931+ ; CHECK-MAXBW-NEXT: [[TMP14]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP13]]
1932+ ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1933+ ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1934+ ; CHECK-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
19261935; CHECK-MAXBW: middle.block:
1927- ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP13]])
1928- ; CHECK-MAXBW-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1936+ ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP14]])
1937+ ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 41, [[N_VEC]]
1938+ ; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
19291939; CHECK-MAXBW: scalar.ph:
19301940;
19311941entry:
@@ -1943,7 +1953,7 @@ for.body: ; preds = %entry, %for.body
19431953 %conv3 = zext i8 %1 to i64
19441954 %mul = mul nuw nsw i64 %conv3 , %conv
19451955 %add = add i64 %sum , %mul
1946- %exitcond.not = icmp eq i64 %i.iv.next , 16
1956+ %exitcond.not = icmp eq i64 %i.iv.next , 41
19471957 br i1 %exitcond.not , label %exit , label %for.body
19481958
19491959exit: ; preds = %for.body
0 commit comments