@@ -560,6 +560,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
560
560
case VPRecipeBase::VPPartialReductionSC:
561
561
return true ;
562
562
case VPRecipeBase::VPBranchOnMaskSC:
563
+ case VPRecipeBase::VPInterleaveEVLSC:
563
564
case VPRecipeBase::VPInterleaveSC:
564
565
case VPRecipeBase::VPIRInstructionSC:
565
566
case VPRecipeBase::VPWidenLoadEVLSC:
@@ -2445,12 +2446,13 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
2445
2446
}
2446
2447
};
2447
2448
2448
- // / VPInterleaveRecipe is a recipe for transforming an interleave group of load
2449
- // / or stores into one wide load/store and shuffles. The first operand of a
2450
- // / VPInterleave recipe is the address, followed by the stored values, followed
2451
- // / by an optional mask.
2452
- class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
2453
- public VPIRMetadata {
2449
+ // / A common base class for interleaved memory operations.
2450
+ // / An Interleaved memory operation is a memory access method that combines
2451
+ // / multiple strided loads/stores into a single wide load/store with shuffles.
2452
+ // / The first operand is the start address. The optional operands are, in order,
2453
+ // / the stored values and the mask.
2454
+ class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase,
2455
+ public VPIRMetadata {
2454
2456
const InterleaveGroup<Instruction> *IG;
2455
2457
2456
2458
// / Indicates if the interleave group is in a conditional block and requires a
@@ -2461,12 +2463,14 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
2461
2463
// / unusued gaps can be loaded speculatively.
2462
2464
bool NeedsMaskForGaps = false ;
2463
2465
2464
- public:
2465
- VPInterleaveRecipe (const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2466
- ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2467
- bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2468
- : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, DL), VPIRMetadata(MD),
2469
- IG (IG), NeedsMaskForGaps(NeedsMaskForGaps) {
2466
+ protected:
2467
+ VPInterleaveBase (const unsigned char SC,
2468
+ const InterleaveGroup<Instruction> *IG,
2469
+ ArrayRef<VPValue *> Operands,
2470
+ ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2471
+ bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2472
+ : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2473
+ NeedsMaskForGaps (NeedsMaskForGaps) {
2470
2474
// TODO: extend the masked interleaved-group support to reversed access.
2471
2475
assert ((!Mask || !IG->isReverse ()) &&
2472
2476
" Reversed masked interleave-group not supported." );
@@ -2484,14 +2488,19 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
2484
2488
addOperand (Mask);
2485
2489
}
2486
2490
}
2487
- ~VPInterleaveRecipe () override = default ;
2488
2491
2489
- VPInterleaveRecipe *clone () override {
2490
- return new VPInterleaveRecipe (IG, getAddr (), getStoredValues (), getMask (),
2491
- NeedsMaskForGaps, *this , getDebugLoc ());
2492
+ public:
2493
+ VPInterleaveBase *clone () override = 0;
2494
+
2495
+ static inline bool classof (const VPRecipeBase *R) {
2496
+ return R->getVPDefID () == VPRecipeBase::VPInterleaveSC ||
2497
+ R->getVPDefID () == VPRecipeBase::VPInterleaveEVLSC;
2492
2498
}
2493
2499
2494
- VP_CLASSOF_IMPL (VPDef::VPInterleaveSC)
2500
+ static inline bool classof (const VPUser *U) {
2501
+ auto *R = dyn_cast<VPRecipeBase>(U);
2502
+ return R && classof (R);
2503
+ }
2495
2504
2496
2505
// / Return the address accessed by this recipe.
2497
2506
VPValue *getAddr () const {
@@ -2501,48 +2510,130 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
2501
2510
// / Return the mask used by this recipe. Note that a full mask is represented
2502
2511
// / by a nullptr.
2503
2512
VPValue *getMask () const {
2504
- // Mask is optional and therefore the last, currently 2nd operand.
2513
+ // Mask is optional and the last operand.
2505
2514
return HasMask ? getOperand (getNumOperands () - 1 ) : nullptr ;
2506
2515
}
2507
2516
2517
+ // / Return true if the access needs a mask because of the gaps.
2518
+ bool needsMaskForGaps () const { return NeedsMaskForGaps; }
2519
+
2520
+ const InterleaveGroup<Instruction> *getInterleaveGroup () const { return IG; }
2521
+
2522
+ Instruction *getInsertPos () const { return IG->getInsertPos (); }
2523
+
2524
+ void execute (VPTransformState &State) override {
2525
+ llvm_unreachable (" VPInterleaveBase should not be instantiated." );
2526
+ }
2527
+
2528
+ // / Return the cost of this recipe.
2529
+ InstructionCost computeCost (ElementCount VF,
2530
+ VPCostContext &Ctx) const override ;
2531
+
2532
+ // / Returns true if the recipe only uses the first lane of operand \p Op.
2533
+ virtual bool onlyFirstLaneUsed (const VPValue *Op) const override = 0;
2534
+
2535
+ // / Returns the number of stored operands of this interleave group. Returns 0
2536
+ // / for load interleave groups.
2537
+ virtual unsigned getNumStoreOperands () const = 0;
2538
+
2508
2539
// / Return the VPValues stored by this interleave group. If it is a load
2509
2540
// / interleave group, return an empty ArrayRef.
2510
2541
ArrayRef<VPValue *> getStoredValues () const {
2511
- // The first operand is the address, followed by the stored values, followed
2512
- // by an optional mask.
2513
- return ArrayRef<VPValue *>(op_begin (), getNumOperands ())
2514
- .slice (1 , getNumStoreOperands ());
2542
+ return ArrayRef<VPValue *>(op_end () -
2543
+ (getNumStoreOperands () + (HasMask ? 1 : 0 )),
2544
+ getNumStoreOperands ());
2515
2545
}
2546
+ };
2547
+
2548
+ // / VPInterleaveRecipe is a recipe for transforming an interleave group of load
2549
+ // / or stores into one wide load/store and shuffles. The first operand of a
2550
+ // / VPInterleave recipe is the address, followed by the stored values, followed
2551
+ // / by an optional mask.
2552
+ class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2553
+ public:
2554
+ VPInterleaveRecipe (const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2555
+ ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2556
+ bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2557
+ : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2558
+ NeedsMaskForGaps, MD, DL) {}
2559
+
2560
+ ~VPInterleaveRecipe () override = default ;
2561
+
2562
+ VPInterleaveRecipe *clone () override {
2563
+ return new VPInterleaveRecipe (getInterleaveGroup (), getAddr (),
2564
+ getStoredValues (), getMask (),
2565
+ needsMaskForGaps (), *this , getDebugLoc ());
2566
+ }
2567
+
2568
+ VP_CLASSOF_IMPL (VPDef::VPInterleaveSC)
2516
2569
2517
2570
// / Generate the wide load or store, and shuffles.
2518
2571
void execute (VPTransformState &State) override ;
2519
2572
2520
- // / Return the cost of this VPInterleaveRecipe.
2521
- InstructionCost computeCost (ElementCount VF,
2522
- VPCostContext &Ctx) const override ;
2523
-
2524
2573
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2525
2574
// / Print the recipe.
2526
2575
void print (raw_ostream &O, const Twine &Indent,
2527
2576
VPSlotTracker &SlotTracker) const override ;
2528
2577
#endif
2529
2578
2530
- const InterleaveGroup<Instruction> *getInterleaveGroup () { return IG; }
2579
+ bool onlyFirstLaneUsed (const VPValue *Op) const override {
2580
+ assert (is_contained (operands (), Op) &&
2581
+ " Op must be an operand of the recipe" );
2582
+ return Op == getAddr () && !llvm::is_contained (getStoredValues (), Op);
2583
+ }
2531
2584
2532
- // / Returns the number of stored operands of this interleave group. Returns 0
2533
- // / for load interleave groups.
2534
- unsigned getNumStoreOperands () const {
2535
- return getNumOperands () - (HasMask ? 2 : 1 );
2585
+ unsigned getNumStoreOperands () const override {
2586
+ return getNumOperands () - (getMask () ? 2 : 1 );
2587
+ }
2588
+ };
2589
+
2590
+ // / A recipe for interleaved memory operations with vector-predication
2591
+ // / intrinsics. The first operand is the address, the second operand is the
2592
+ // / explicit vector length. Stored values and mask are optional operands.
2593
+ class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2594
+ public:
2595
+ VPInterleaveEVLRecipe (VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2596
+ : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2597
+ ArrayRef<VPValue *>({R.getAddr (), &EVL}),
2598
+ R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2599
+ R.getDebugLoc()) {
2600
+ assert (!getInterleaveGroup ()->isReverse () &&
2601
+ " Reversed interleave-group with tail folding is not supported." );
2602
+ assert (!needsMaskForGaps () && " Interleaved access with gap mask is not "
2603
+ " supported for scalable vector." );
2604
+ }
2605
+
2606
+ ~VPInterleaveEVLRecipe () override = default ;
2607
+
2608
+ VPInterleaveEVLRecipe *clone () override {
2609
+ llvm_unreachable (" cloning not implemented yet" );
2536
2610
}
2537
2611
2538
- // / The recipe only uses the first lane of the address.
2612
+ VP_CLASSOF_IMPL (VPDef::VPInterleaveEVLSC)
2613
+
2614
+ // / The VPValue of the explicit vector length.
2615
+ VPValue *getEVL () const { return getOperand (1 ); }
2616
+
2617
+ // / Generate the wide load or store, and shuffles.
2618
+ void execute (VPTransformState &State) override ;
2619
+
2620
+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2621
+ // / Print the recipe.
2622
+ void print (raw_ostream &O, const Twine &Indent,
2623
+ VPSlotTracker &SlotTracker) const override ;
2624
+ #endif
2625
+
2626
+ // / The recipe only uses the first lane of the address, and EVL operand.
2539
2627
bool onlyFirstLaneUsed (const VPValue *Op) const override {
2540
2628
assert (is_contained (operands (), Op) &&
2541
2629
" Op must be an operand of the recipe" );
2542
- return Op == getAddr () && !llvm::is_contained (getStoredValues (), Op);
2630
+ return (Op == getAddr () && !llvm::is_contained (getStoredValues (), Op)) ||
2631
+ Op == getEVL ();
2543
2632
}
2544
2633
2545
- Instruction *getInsertPos () const { return IG->getInsertPos (); }
2634
+ unsigned getNumStoreOperands () const override {
2635
+ return getNumOperands () - (getMask () ? 3 : 2 );
2636
+ }
2546
2637
};
2547
2638
2548
2639
// / A recipe to represent inloop reduction operations, performing a reduction on
0 commit comments