diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 1789671276ffa..6102e54586b4c 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -742,6 +742,13 @@ llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, Abort = true; break; } + + // If all of instructions in the chain are load and cast instructions, we + // cannot get any benefit from MinBW. + if (all_of(llvm::make_range(ECs.member_begin(I), ECs.member_end()), + [](Value *M) { return isa(M); })) + Abort = true; + if (Abort) continue; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/trunc-cast.ll b/llvm/test/Transforms/LoopVectorize/RISCV/trunc-cast.ll new file mode 100644 index 0000000000000..b76561d5584eb --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/trunc-cast.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt --passes=loop-vectorize -S %s -mtriple riscv64 -mattr=+v | FileCheck %s +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" + +define void @h(ptr %i, ptr %k, i64 %idxprom.us) #0 { +; CHECK-LABEL: define void @h( +; CHECK-SAME: ptr [[I:%.*]], ptr [[K:%.*]], i64 [[IDXPROM_US:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 1073741824, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1073741824, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1073741824, [[TMP5]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[N_VEC]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() +; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i32 4) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 4, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i8 0, i32 [[TMP14]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[I]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP15:%.*]] = zext [[VEC_IND]] to +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [2 x i16], ptr [[K]], [[TMP15]], i64 [[IDXPROM_US]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i16.nxv2p0( [[TMP16]], i32 2, splat (i1 true), poison) +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[WIDE_MASKED_GATHER]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP17]], poison) +; CHECK-NEXT: [[TMP18:%.*]] = shl zeroinitializer, [[WIDE_MASKED_GATHER1]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP19]], splat (i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP17]], [[TMP20]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP21]], poison) +; CHECK-NEXT: [[TMP22:%.*]] = trunc [[WIDE_MASKED_GATHER2]] to +; CHECK-NEXT: [[TMP23:%.*]] = xor [[TMP17]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP21]], [[TMP22]], zeroinitializer +; CHECK-NEXT: [[PREDPHI3:%.*]] = select [[TMP23]], zeroinitializer, [[PREDPHI]] +; CHECK-NEXT: [[TMP24:%.*]] = trunc [[PREDPHI3]] to +; CHECK-NEXT: [[TMP25]] = mul zeroinitializer, [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [2 x i64], ptr [[I]], i64 0, [[TMP15]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP26]], i32 8, splat (i1 true), poison) +; CHECK-NEXT: [[TMP27:%.*]] = trunc [[WIDE_MASKED_GATHER4]] to +; CHECK-NEXT: [[TMP28:%.*]] = xor [[TMP27]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement [[TMP28]], i32 [[TMP31]] +; CHECK-NEXT: store i16 [[TMP32]], ptr null, align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], 2 +; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP35]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[TMP25]], i32 [[TMP36]] +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY_US:.*]] +; CHECK: [[FOR_BODY_US]]: +; CHECK-NEXT: [[L_046_US:%.*]] = phi i32 [ [[ADD_US:%.*]], %[[COND_END23_US:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[CONV284345_US:%.*]] = phi i8 [ [[CONV28_US:%.*]], %[[COND_END23_US]] ], [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IDXPROM_US1:%.*]] = zext i32 [[L_046_US]] to i64 +; CHECK-NEXT: [[ARRAYIDX3_US:%.*]] = getelementptr [2 x i16], ptr [[K]], i64 [[IDXPROM_US1]], i64 [[IDXPROM_US]] +; CHECK-NEXT: [[TMP37:%.*]] = load i16, ptr [[ARRAYIDX3_US]], align 2 +; CHECK-NEXT: [[TOBOOL4_NOT_US:%.*]] = icmp eq i16 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[TOBOOL4_NOT_US]], label %[[COND_FALSE7_US:.*]], label %[[COND_END23_US]] +; CHECK: [[COND_FALSE7_US]]: +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[I]], align 8 +; CHECK-NEXT: [[SHL_US:%.*]] = shl i64 0, [[TMP38]] +; CHECK-NEXT: [[TOBOOL12_NOT_US:%.*]] = icmp eq i64 [[SHL_US]], 0 +; CHECK-NEXT: br i1 [[TOBOOL12_NOT_US]], label %[[COND_END23_US]], label %[[COND_TRUE14_US:.*]] +; CHECK: [[COND_TRUE14_US]]: +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[I]], align 8 +; CHECK-NEXT: [[TMP40:%.*]] = trunc i64 [[TMP39]] to i32 +; CHECK-NEXT: br label %[[COND_END23_US]] +; CHECK: [[COND_END23_US]]: +; CHECK-NEXT: [[COND24_US:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE14_US]] ], [ 0, %[[FOR_BODY_US]] ], [ 0, %[[COND_FALSE7_US]] ] +; CHECK-NEXT: [[TMP41:%.*]] = trunc i32 [[COND24_US]] to i8 +; CHECK-NEXT: [[CONV28_US]] = mul i8 0, [[TMP41]] +; CHECK-NEXT: [[ARRAYIDX31_US:%.*]] = getelementptr [2 x i64], ptr [[I]], i64 0, i64 [[IDXPROM_US1]] +; CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[ARRAYIDX31_US]], align 8 +; CHECK-NEXT: [[TMP43:%.*]] = trunc i64 [[TMP42]] to i16 +; CHECK-NEXT: [[CONV32_US:%.*]] = xor i16 [[TMP43]], 0 +; CHECK-NEXT: store i16 [[CONV32_US]], ptr null, align 2 +; CHECK-NEXT: [[ADD_US]] = add i32 [[L_046_US]], 4 +; CHECK-NEXT: [[TOBOOL_NOT_US:%.*]] = icmp eq i32 [[ADD_US]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT_US]], label %[[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:.*]], label %[[FOR_BODY_US]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8 [ [[TMP41]], %[[COND_END23_US]] ] +; CHECK-NEXT: store i8 [[DOTLCSSA]], ptr null, align 1 +; CHECK-NEXT: ret void +; +entry: + br label %for.body.us + +for.body.us: ; preds = %cond.end23.us, %entry + %l.046.us = phi i32 [ %add.us, %cond.end23.us ], [ 0, %entry ] + %conv284345.us = phi i8 [ %conv28.us, %cond.end23.us ], [ 0, %entry ] + %idxprom.us1 = zext i32 %l.046.us to i64 + %arrayidx3.us = getelementptr [2 x i16], ptr %k, i64 %idxprom.us1, i64 %idxprom.us + %0 = load i16, ptr %arrayidx3.us, align 2 + %tobool4.not.us = icmp eq i16 %0, 0 + br i1 %tobool4.not.us, label %cond.false7.us, label %cond.end23.us + +cond.false7.us: ; preds = %for.body.us + %1 = load i64, ptr %i, align 8 + %shl.us = shl i64 0, %1 + %tobool12.not.us = icmp eq i64 %shl.us, 0 + br i1 %tobool12.not.us, label %cond.end23.us, label %cond.true14.us + +cond.true14.us: ; preds = %cond.false7.us + %2 = load i64, ptr %i, align 8 + %3 = trunc i64 %2 to i32 + br label %cond.end23.us + +cond.end23.us: ; preds = %cond.true14.us, %cond.false7.us, %for.body.us + %cond24.us = phi i32 [ %3, %cond.true14.us ], [ 0, %for.body.us ], [ 0, %cond.false7.us ] + %4 = trunc i32 %cond24.us to i8 + %conv28.us = mul i8 0, %4 + %arrayidx31.us = getelementptr [2 x i64], ptr %i, i64 0, i64 %idxprom.us1 + %5 = load i64, ptr %arrayidx31.us, align 8 + %6 = trunc i64 %5 to i16 + %conv32.us = xor i16 %6, 0 + store i16 %conv32.us, ptr null, align 2 + %add.us = add i32 %l.046.us, 4 + %tobool.not.us = icmp eq i32 %add.us, 0 + br i1 %tobool.not.us, label %for.cond.for.cond.cleanup_crit_edge, label %for.body.us + +for.cond.for.cond.cleanup_crit_edge: ; preds = %cond.end23.us + store i8 %4, ptr null, align 1 + ret void +} + +attributes #0 = { "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zacas,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" } + +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;.