Rebase and Revert the implement of castEVLRecipe

LiqinWeng · LiqinWeng · commit 4d3be159fabe · 2024-11-28T10:13:53.000+08:00
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4516,7 +4516,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
       case VPDef::VPWidenCallSC:
       case VPDef::VPWidenCanonicalIVSC:
       case VPDef::VPWidenCastSC:
-      case VPDef::VPWidenCastEVLSC:
       case VPDef::VPWidenGEPSC:
       case VPDef::VPWidenIntrinsicSC:
       case VPDef::VPWidenSC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -866,7 +866,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPWidenCallSC:
     case VPRecipeBase::VPWidenCanonicalIVSC:
     case VPRecipeBase::VPWidenCastSC:
-    case VPRecipeBase::VPWidenCastEVLSC:
     case VPRecipeBase::VPWidenGEPSC:
     case VPRecipeBase::VPWidenIntrinsicSC:
     case VPRecipeBase::VPWidenSC:
@@ -1064,7 +1063,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
            R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
-           R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
            R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
            R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
            R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
@@ -1544,28 +1542,19 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
   /// Result type for the cast.
   Type *ResultTy;
 
-protected:
-  VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
-                    VPValue *Op, Type *ResultTy, CastInst &UI)
+public:
+  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+                    CastInst &UI)
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
         ResultTy(ResultTy) {
     assert(UI.getOpcode() == Opcode &&
            "opcode of underlying cast doesn't match");
   }
 
-  VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
-                    VPValue *Op, Type *ResultTy)
+  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
         ResultTy(ResultTy) {}
 
-public:
-  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                    CastInst &UI)
-      : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
-
-  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
-      : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
-
   ~VPWidenCastRecipe() override = default;
 
   VPWidenCastRecipe *clone() override {
@@ -1576,15 +1565,7 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
     return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
   }
 
-  static inline bool classof(const VPRecipeBase *R) {
-    return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
-           R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
-  }
-
-  static inline bool classof(const VPUser *U) {
-    auto *R = dyn_cast<VPRecipeBase>(U);
-    return R && classof(R);
-  }
+  VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
 
   /// Produce widened copies of the cast.
   void execute(VPTransformState &State) override;
@@ -1605,54 +1586,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
   Type *getResultType() const { return ResultTy; }
 };
 
-// A recipe for widening cast operation with vector-predication intrinsics with
-/// explicit vector length (EVL).
-class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
-  using VPRecipeWithIRFlags::transferFlags;
-
-public:
-  VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                       VPValue &EVL)
-      : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
-    addOperand(&EVL);
-  }
-  VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
-      : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
-                             EVL) {
-    transferFlags(W);
-  }
-
-  ~VPWidenCastEVLRecipe() override = default;
-
-  VPWidenCastEVLRecipe *clone() final {
-    llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
-    return nullptr;
-  }
-
-  VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
-
-  VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
-  const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
-
-  /// Produce a vp-intrinsic copies of the cast.
-  void execute(VPTransformState &State) final;
-
-  /// Returns true if the recipe only uses the first lane of operand \p Op.
-  bool onlyFirstLaneUsed(const VPValue *Op) const override {
-    assert(is_contained(operands(), Op) &&
-           "Op must be an operand of the recipe");
-    // EVL in that recipe is always the last operand, thus any use before means
-    // the VPValue should be vectorized.
-    return getEVL() == Op;
-  }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  /// Print the recipe.
-  void print(raw_ostream &O, const Twine &Indent,
-             VPSlotTracker &SlotTracker) const final;
-#endif
-};
-
 /// VPScalarCastRecipe is a recipe to create scalar cast instructions.
 class VPScalarCastRecipe : public VPSingleDefRecipe {
   Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -92,7 +92,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
   case VPVectorPointerSC:
   case VPWidenCanonicalIVSC:
   case VPWidenCastSC:
-  case VPWidenCastEVLSC:
   case VPWidenGEPSC:
   case VPWidenIntOrFpInductionSC:
   case VPWidenLoadEVLSC:
@@ -139,7 +138,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
   case VPVectorPointerSC:
   case VPWidenCanonicalIVSC:
   case VPWidenCastSC:
-  case VPWidenCastEVLSC:
   case VPWidenGEPSC:
   case VPWidenIntOrFpInductionSC:
   case VPWidenPHISC:
@@ -180,7 +178,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
   case VPVectorPointerSC:
   case VPWidenCanonicalIVSC:
   case VPWidenCastSC:
-  case VPWidenCastEVLSC:
   case VPWidenGEPSC:
   case VPWidenIntOrFpInductionSC:
   case VPWidenPHISC:
@@ -1557,40 +1554,6 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
       dyn_cast_if_present<Instruction>(getUnderlyingValue()));
 }
 
-void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
-  unsigned Opcode = getOpcode();
-  State.setDebugLocFrom(getDebugLoc());
-  assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
-                          "explicit vector length.");
-
-  assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
-         "VPWidenCastEVLRecipe should not be used for scalars");
-
-  // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
-  if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
-      Opcode == Instruction::Trunc) {
-    Value *SrcVal = State.get(getOperand(0), 0);
-    VectorType *SrcTy = cast<VectorType>(SrcVal->getType());
-    VectorType *DsType =
-        VectorType::get(getResultType(), SrcTy->getElementCount());
-
-    IRBuilderBase &BuilderIR = State.Builder;
-    VectorBuilder Builder(BuilderIR);
-    Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
-
-    Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
-    Value *VPInst =
-        Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
-    if (VPInst) {
-      if (auto *VecOp = dyn_cast<CastInst>(VPInst))
-        VecOp->copyIRFlags(getUnderlyingInstr());
-    }
-    State.set(this, VPInst, 0);
-    State.addMetadata(VPInst,
-                      dyn_cast_or_null<Instruction>(getUnderlyingValue()));
-  }
-}
-
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
                               VPSlotTracker &SlotTracker) const {
@@ -1601,16 +1564,6 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
   printOperands(O, SlotTracker);
   O << " to " << *getResultType();
 }
-
-void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
-                                 VPSlotTracker &SlotTracker) const {
-  O << Indent << "WIDEN-VP ";
-  printAsOperand(O, SlotTracker);
-  O << " = vp." << Instruction::getOpcodeName(getOpcode()) << " ";
-  printFlags(O);
-  printOperands(O, SlotTracker);
-  O << " to " << *getResultType();
-}
 #endif
 
 InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1477,15 +1477,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
                   return nullptr;
                 return new VPWidenEVLRecipe(*W, EVL);
               })
-              .Case<VPWidenCastRecipe>(
-                  [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
-                    unsigned Opcode = W->getOpcode();
-                    if (Opcode != Instruction::SExt &&
-                        Opcode != Instruction::ZExt &&
-                        Opcode != Instruction::Trunc)
-                      return nullptr;
-                    return new VPWidenCastEVLRecipe(*W, EVL);
-                  })
               .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
                 VPValue *NewMask = GetNewMask(Red->getCondOp());
                 return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -337,7 +337,6 @@ class VPDef {
     VPWidenCallSC,
     VPWidenCanonicalIVSC,
     VPWidenCastSC,
-    VPWidenCastEVLSC,
     VPWidenGEPSC,
     VPWidenIntrinsicSC,
     VPWidenLoadEVLSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -200,7 +200,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
 ; IF-EVL-INLOOP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
 ; IF-EVL-INLOOP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
 ; IF-EVL-INLOOP-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT:    [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT:    [[VP_CAST:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
 ; IF-EVL-INLOOP-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
 ; IF-EVL-INLOOP-NEXT:    [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
 ; IF-EVL-INLOOP-NEXT:    [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll