@@ -163,8 +163,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
163163
164164 CurDAG->setRoot (Dummy.getValue ());
165165
166- MadeChange |= doPeepholeMergeVVMFold ();
167-
168166 // After we're done with everything else, convert IMPLICIT_DEF
169167 // passthru operands to NoRegister. This is required to workaround
170168 // an optimization deficiency in MachineCSE. This really should
@@ -4069,218 +4067,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
40694067 return true ;
40704068}
40714069
4072- static bool IsVMerge (SDNode *N) {
4073- return RISCV::getRVVMCOpcode (N->getMachineOpcode ()) == RISCV::VMERGE_VVM;
4074- }
4075-
4076- // Try to fold away VMERGE_VVM instructions into their true operands:
4077- //
4078- // %true = PseudoVADD_VV ...
4079- // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
4080- // ->
4081- // %x = PseudoVADD_VV_MASK %false, ..., %mask
4082- //
4083- // We can only fold if vmerge's passthru operand, vmerge's false operand and
4084- // %true's passthru operand (if it has one) are the same. This is because we
4085- // have to consolidate them into one passthru operand in the result.
4086- //
4087- // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
4088- // mask is all ones.
4089- //
4090- // The resulting VL is the minimum of the two VLs.
4091- //
4092- // The resulting policy is the effective policy the vmerge would have had,
4093- // i.e. whether or not it's passthru operand was implicit-def.
4094- bool RISCVDAGToDAGISel::performCombineVMergeAndVOps (SDNode *N) {
4095- SDValue Passthru, False, True, VL, Mask;
4096- assert (IsVMerge (N));
4097- Passthru = N->getOperand (0 );
4098- False = N->getOperand (1 );
4099- True = N->getOperand (2 );
4100- Mask = N->getOperand (3 );
4101- VL = N->getOperand (4 );
4102-
4103- // If the EEW of True is different from vmerge's SEW, then we can't fold.
4104- if (True.getSimpleValueType () != N->getSimpleValueType (0 ))
4105- return false ;
4106-
4107- // We require that either passthru and false are the same, or that passthru
4108- // is undefined.
4109- if (Passthru != False && !isImplicitDef (Passthru))
4110- return false ;
4111-
4112- assert (True.getResNo () == 0 &&
4113- " Expect True is the first output of an instruction." );
4114-
4115- // Need N is the exactly one using True.
4116- if (!True.hasOneUse ())
4117- return false ;
4118-
4119- if (!True.isMachineOpcode ())
4120- return false ;
4121-
4122- unsigned TrueOpc = True.getMachineOpcode ();
4123- const MCInstrDesc &TrueMCID = TII->get (TrueOpc);
4124- uint64_t TrueTSFlags = TrueMCID.TSFlags ;
4125- bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse (TrueMCID);
4126-
4127- const RISCV::RISCVMaskedPseudoInfo *Info =
4128- RISCV::lookupMaskedIntrinsicByUnmasked (TrueOpc);
4129- if (!Info)
4130- return false ;
4131-
4132- // If True has a passthru operand then it needs to be the same as vmerge's
4133- // False, since False will be used for the result's passthru operand.
4134- if (HasTiedDest && !isImplicitDef (True->getOperand (0 ))) {
4135- SDValue PassthruOpTrue = True->getOperand (0 );
4136- if (False != PassthruOpTrue)
4137- return false ;
4138- }
4139-
4140- // Skip if True has side effect.
4141- if (TII->get (TrueOpc).hasUnmodeledSideEffects ())
4142- return false ;
4143-
4144- unsigned TrueChainOpIdx = True.getNumOperands () - 1 ;
4145- bool HasChainOp =
4146- True.getOperand (TrueChainOpIdx).getValueType () == MVT::Other;
4147-
4148- if (HasChainOp) {
4149- // Avoid creating cycles in the DAG. We must ensure that none of the other
4150- // operands depend on True through it's Chain.
4151- SmallVector<const SDNode *, 4 > LoopWorklist;
4152- SmallPtrSet<const SDNode *, 16 > Visited;
4153- LoopWorklist.push_back (False.getNode ());
4154- LoopWorklist.push_back (Mask.getNode ());
4155- LoopWorklist.push_back (VL.getNode ());
4156- if (SDNode::hasPredecessorHelper (True.getNode (), Visited, LoopWorklist))
4157- return false ;
4158- }
4159-
4160- // The vector policy operand may be present for masked intrinsics
4161- bool HasVecPolicyOp = RISCVII::hasVecPolicyOp (TrueTSFlags);
4162- unsigned TrueVLIndex =
4163- True.getNumOperands () - HasVecPolicyOp - HasChainOp - 2 ;
4164- SDValue TrueVL = True.getOperand (TrueVLIndex);
4165- SDValue SEW = True.getOperand (TrueVLIndex + 1 );
4166-
4167- auto GetMinVL = [](SDValue LHS, SDValue RHS) {
4168- if (LHS == RHS)
4169- return LHS;
4170- if (isAllOnesConstant (LHS))
4171- return RHS;
4172- if (isAllOnesConstant (RHS))
4173- return LHS;
4174- auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
4175- auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
4176- if (!CLHS || !CRHS)
4177- return SDValue ();
4178- return CLHS->getZExtValue () <= CRHS->getZExtValue () ? LHS : RHS;
4179- };
4180-
4181- // Because N and True must have the same passthru operand (or True's operand
4182- // is implicit_def), the "effective" body is the minimum of their VLs.
4183- SDValue OrigVL = VL;
4184- VL = GetMinVL (TrueVL, VL);
4185- if (!VL)
4186- return false ;
4187-
4188- // Some operations produce different elementwise results depending on the
4189- // active elements, like viota.m or vredsum. This transformation is illegal
4190- // for these if we change the active elements (i.e. mask or VL).
4191- const MCInstrDesc &TrueBaseMCID = TII->get (RISCV::getRVVMCOpcode (TrueOpc));
4192- if (RISCVII::elementsDependOnVL (TrueBaseMCID.TSFlags ) && (TrueVL != VL))
4193- return false ;
4194- if (RISCVII::elementsDependOnMask (TrueBaseMCID.TSFlags ) &&
4195- (Mask && !usesAllOnesMask (Mask)))
4196- return false ;
4197-
4198- // Make sure it doesn't raise any observable fp exceptions, since changing the
4199- // active elements will affect how fflags is set.
4200- if (mayRaiseFPException (True.getNode ()) && !True->getFlags ().hasNoFPExcept ())
4201- return false ;
4202-
4203- SDLoc DL (N);
4204-
4205- unsigned MaskedOpc = Info->MaskedPseudo ;
4206- #ifndef NDEBUG
4207- const MCInstrDesc &MaskedMCID = TII->get (MaskedOpc);
4208- assert (RISCVII::hasVecPolicyOp (MaskedMCID.TSFlags ) &&
4209- " Expected instructions with mask have policy operand." );
4210- assert (MaskedMCID.getOperandConstraint (MaskedMCID.getNumDefs (),
4211- MCOI::TIED_TO) == 0 &&
4212- " Expected instructions with mask have a tied dest." );
4213- #endif
4214-
4215- // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4216- // operand is undefined.
4217- //
4218- // However, if the VL became smaller than what the vmerge had originally, then
4219- // elements past VL that were previously in the vmerge's body will have moved
4220- // to the tail. In that case we always need to use tail undisturbed to
4221- // preserve them.
4222- bool MergeVLShrunk = VL != OrigVL;
4223- uint64_t Policy = (isImplicitDef (Passthru) && !MergeVLShrunk)
4224- ? RISCVVType::TAIL_AGNOSTIC
4225- : /* TUMU*/ 0 ;
4226- SDValue PolicyOp =
4227- CurDAG->getTargetConstant (Policy, DL, Subtarget->getXLenVT ());
4228-
4229-
4230- SmallVector<SDValue, 8 > Ops;
4231- Ops.push_back (False);
4232-
4233- const bool HasRoundingMode = RISCVII::hasRoundModeOp (TrueTSFlags);
4234- const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4235- Ops.append (True->op_begin () + HasTiedDest, True->op_begin () + NormalOpsEnd);
4236-
4237- Ops.push_back (Mask);
4238-
4239- // For unmasked "VOp" with rounding mode operand, that is interfaces like
4240- // (..., rm, vl) or (..., rm, vl, policy).
4241- // Its masked version is (..., vm, rm, vl, policy).
4242- // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4243- if (HasRoundingMode)
4244- Ops.push_back (True->getOperand (TrueVLIndex - 1 ));
4245-
4246- Ops.append ({VL, SEW, PolicyOp});
4247-
4248- // Result node should have chain operand of True.
4249- if (HasChainOp)
4250- Ops.push_back (True.getOperand (TrueChainOpIdx));
4251-
4252- MachineSDNode *Result =
4253- CurDAG->getMachineNode (MaskedOpc, DL, True->getVTList (), Ops);
4254- Result->setFlags (True->getFlags ());
4255-
4256- if (!cast<MachineSDNode>(True)->memoperands_empty ())
4257- CurDAG->setNodeMemRefs (Result, cast<MachineSDNode>(True)->memoperands ());
4258-
4259- // Replace vmerge.vvm node by Result.
4260- ReplaceUses (SDValue (N, 0 ), SDValue (Result, 0 ));
4261-
4262- // Replace another value of True. E.g. chain and VL.
4263- for (unsigned Idx = 1 ; Idx < True->getNumValues (); ++Idx)
4264- ReplaceUses (True.getValue (Idx), SDValue (Result, Idx));
4265-
4266- return true ;
4267- }
4268-
4269- bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold () {
4270- bool MadeChange = false ;
4271- SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end ();
4272-
4273- while (Position != CurDAG->allnodes_begin ()) {
4274- SDNode *N = &*--Position;
4275- if (N->use_empty () || !N->isMachineOpcode ())
4276- continue ;
4277-
4278- if (IsVMerge (N))
4279- MadeChange |= performCombineVMergeAndVOps (N);
4280- }
4281- return MadeChange;
4282- }
4283-
42844070// / If our passthru is an implicit_def, use noreg instead. This side
42854071// / steps issues with MachineCSE not being able to CSE expressions with
42864072// / IMPLICIT_DEF operands while preserving the semantic intent. See
0 commit comments