@@ -307,18 +307,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
307307 Value *Op2 = State.get (getOperand (2 ), Part);
308308 return Builder.CreateSelect (Cond, Op1, Op2, Name);
309309 }
310- case VPInstruction::ActiveLaneMask: {
311- // Get first lane of vector induction variable.
312- Value *VIVElem0 = State.get (getOperand (0 ), VPIteration (Part, 0 ));
313- // Get the original loop tripcount.
314- Value *ScalarTC = State.get (getOperand (1 ), VPIteration (Part, 0 ));
315310
316- auto *Int1Ty = Type::getInt1Ty (Builder.getContext ());
317- auto *PredTy = VectorType::get (Int1Ty, State.VF );
318- return Builder.CreateIntrinsic (Intrinsic::get_active_lane_mask,
319- {PredTy, ScalarTC->getType ()},
320- {VIVElem0, ScalarTC}, nullptr , Name);
321- }
322311 case VPInstruction::FirstOrderRecurrenceSplice: {
323312 // Generate code to combine the previous and current values in vector v3.
324313 //
@@ -526,7 +515,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
526515 case Instruction::ICmp:
527516 // TODO: Cover additional opcodes.
528517 return vputils::onlyFirstLaneUsed (this );
529- case VPInstruction::ActiveLaneMask:
530518 case VPInstruction::CalculateTripCountMinusVF:
531519 case VPInstruction::CanonicalIVIncrementForPart:
532520 case VPInstruction::BranchOnCount:
@@ -561,9 +549,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
561549 case VPInstruction::SLPStore:
562550 O << " combined store" ;
563551 break ;
564- case VPInstruction::ActiveLaneMask:
565- O << " active lane mask" ;
566- break ;
567552 case VPInstruction::FirstOrderRecurrenceSplice:
568553 O << " first-order splice" ;
569554 break ;
@@ -594,8 +579,78 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
594579 DL.print (O);
595580 }
596581}
582+
583+ void VPActiveLaneMaskRecipe::print (raw_ostream &O, const Twine &Indent,
584+ VPSlotTracker &SlotTracker) const {
585+ O << Indent << " EMIT " ;
586+
587+ printAsOperand (O, SlotTracker);
588+ O << " = active lane mask" ;
589+ printFlags (O);
590+ printOperands (O, SlotTracker);
591+
592+ if (auto DL = getDebugLoc ()) {
593+ O << " , !dbg " ;
594+ DL.print (O);
595+ }
596+ }
597+
597598#endif
598599
600+ void VPActiveLaneMaskRecipe::execute (VPTransformState &State) {
601+ assert (!State.Instance && " VPInstruction executing an Instance" );
602+
603+ IRBuilderBase &Builder = State.Builder ;
604+ Builder.SetCurrentDebugLocation (getDebugLoc ());
605+
606+ auto *Int1Ty = Type::getInt1Ty (Builder.getContext ());
607+ auto *PredTy = VectorType::get (Int1Ty, State.VF );
608+
609+ unsigned MaxPred = std::min (State.MaxPred .getKnownMinValue (),
610+ State.UF * State.VF .getKnownMinValue ());
611+ if (State.UF <= 1 || MaxPred <= State.VF .getKnownMinValue () ||
612+ MaxPred % State.VF .getKnownMinValue () != 0 ) {
613+ for (int Part = State.UF - 1 ; Part >= 0 ; --Part) {
614+ // Get first lane of vector induction variable.
615+ Value *VIVElem0 = State.get (getOperand (0 ), VPIteration (Part, 0 ));
616+ // Get the original loop tripcount.
617+ Value *ScalarTC = State.get (getOperand (1 ), VPIteration (0 , 0 ));
618+ Value *V = Builder.CreateIntrinsic (Intrinsic::get_active_lane_mask,
619+ {PredTy, ScalarTC->getType ()},
620+ {VIVElem0, ScalarTC}, nullptr , Name);
621+ State.set (this , V, Part);
622+ }
623+ return ;
624+ }
625+
626+ // Generate long active lane masks covering all the unrolled iterations.
627+ unsigned PartsPerMask = MaxPred / State.VF .getKnownMinValue ();
628+ auto *LongPredTy = VectorType::get (Int1Ty, MaxPred, State.VF .isScalable ());
629+ SmallVector<Value *> LongMask (State.UF / PartsPerMask, nullptr );
630+ for (int Part = State.UF - PartsPerMask; Part >= 0 ; Part -= PartsPerMask) {
631+ // Get first lane of vector induction variable.
632+ Value *VIVElem0 = State.get (getOperand (0 ), VPIteration (Part, 0 ));
633+ // Get the original loop tripcount.
634+ Value *ScalarTC = State.get (getOperand (1 ), VPIteration (0 , 0 ));
635+ Value *V = Builder.CreateIntrinsic (Intrinsic::get_active_lane_mask,
636+ {LongPredTy, ScalarTC->getType ()},
637+ {VIVElem0, ScalarTC}, nullptr , Name);
638+ LongMask[Part / PartsPerMask] = V;
639+ }
640+
641+ for (int Part = State.UF - 1 ; Part >= 0 ; --Part) {
642+ Value *ALM = LongMask[Part / PartsPerMask];
643+ const unsigned I = Part % PartsPerMask;
644+ Value *V = Builder.CreateIntrinsic (
645+ Intrinsic::vector_extract, {PredTy, ALM->getType ()},
646+ {ALM, ConstantInt::get (Type::getInt64Ty (Builder.getContext ()),
647+ I * State.VF .getKnownMinValue ())},
648+ nullptr , Name);
649+
650+ State.set (this , V, Part);
651+ }
652+ }
653+
599654void VPWidenCallRecipe::execute (VPTransformState &State) {
600655 assert (State.VF .isVector () && " not widening" );
601656 auto &CI = *cast<CallInst>(getUnderlyingInstr ());
0 commit comments