@@ -737,6 +737,44 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
737737 return nullptr ;
738738}
739739
740+ // / Convert a table lookup to shufflevector if the mask is constant.
741+ // / This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
742+ // / which case we could lower the shufflevector with rev64 instructions
743+ // / as it's actually a byte reverse.
744+ static Value *simplifyNeonTbl1 (const IntrinsicInst &II,
745+ InstCombiner::BuilderTy &Builder) {
746+ // Bail out if the mask is not a constant.
747+ auto *C = dyn_cast<Constant>(II.getArgOperand (1 ));
748+ if (!C)
749+ return nullptr ;
750+
751+ auto *VecTy = cast<FixedVectorType>(II.getType ());
752+ unsigned NumElts = VecTy->getNumElements ();
753+
754+ // Only perform this transformation for <8 x i8> vector types.
755+ if (!VecTy->getElementType ()->isIntegerTy (8 ) || NumElts != 8 )
756+ return nullptr ;
757+
758+ int Indexes[8 ];
759+
760+ for (unsigned I = 0 ; I < NumElts; ++I) {
761+ Constant *COp = C->getAggregateElement (I);
762+
763+ if (!COp || !isa<ConstantInt>(COp))
764+ return nullptr ;
765+
766+ Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue ();
767+
768+ // Make sure the mask indices are in range.
769+ if ((unsigned )Indexes[I] >= NumElts)
770+ return nullptr ;
771+ }
772+
773+ auto *V1 = II.getArgOperand (0 );
774+ auto *V2 = Constant::getNullValue (V1->getType ());
775+ return Builder.CreateShuffleVector (V1, V2, ArrayRef (Indexes));
776+ }
777+
740778// Returns true iff the 2 intrinsics have the same operands, limiting the
741779// comparison to the first NumOperands.
742780static bool haveSameOperands (const IntrinsicInst &I, const IntrinsicInst &E,
@@ -3128,6 +3166,72 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
31283166 Intrinsic::getOrInsertDeclaration (II->getModule (), NewIntrin);
31293167 return CallInst::Create (NewFn, CallArgs);
31303168 }
3169+ case Intrinsic::arm_neon_vtbl1:
3170+ case Intrinsic::aarch64_neon_tbl1:
3171+ if (Value *V = simplifyNeonTbl1 (*II, Builder))
3172+ return replaceInstUsesWith (*II, V);
3173+ break ;
3174+
3175+ case Intrinsic::arm_neon_vmulls:
3176+ case Intrinsic::arm_neon_vmullu:
3177+ case Intrinsic::aarch64_neon_smull:
3178+ case Intrinsic::aarch64_neon_umull: {
3179+ Value *Arg0 = II->getArgOperand (0 );
3180+ Value *Arg1 = II->getArgOperand (1 );
3181+
3182+ // Handle mul by zero first:
3183+ if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3184+ return replaceInstUsesWith (CI, ConstantAggregateZero::get (II->getType ()));
3185+ }
3186+
3187+ // Check for constant LHS & RHS - in this case we just simplify.
3188+ bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3189+ IID == Intrinsic::aarch64_neon_umull);
3190+ VectorType *NewVT = cast<VectorType>(II->getType ());
3191+ if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3192+ if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3193+ Value *V0 = Builder.CreateIntCast (CV0, NewVT, /* isSigned=*/ !Zext);
3194+ Value *V1 = Builder.CreateIntCast (CV1, NewVT, /* isSigned=*/ !Zext);
3195+ return replaceInstUsesWith (CI, Builder.CreateMul (V0, V1));
3196+ }
3197+
3198+ // Couldn't simplify - canonicalize constant to the RHS.
3199+ std::swap (Arg0, Arg1);
3200+ }
3201+
3202+ // Handle mul by one:
3203+ if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3204+ if (ConstantInt *Splat =
3205+ dyn_cast_or_null<ConstantInt>(CV1->getSplatValue ()))
3206+ if (Splat->isOne ())
3207+ return CastInst::CreateIntegerCast (Arg0, II->getType (),
3208+ /* isSigned=*/ !Zext);
3209+
3210+ break ;
3211+ }
3212+ case Intrinsic::arm_neon_aesd:
3213+ case Intrinsic::arm_neon_aese:
3214+ case Intrinsic::aarch64_crypto_aesd:
3215+ case Intrinsic::aarch64_crypto_aese:
3216+ case Intrinsic::aarch64_sve_aesd:
3217+ case Intrinsic::aarch64_sve_aese: {
3218+ Value *DataArg = II->getArgOperand (0 );
3219+ Value *KeyArg = II->getArgOperand (1 );
3220+
3221+ // Accept zero on either operand.
3222+ if (!match (KeyArg, m_ZeroInt ()))
3223+ std::swap (KeyArg, DataArg);
3224+
3225+ // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3226+ Value *Data, *Key;
3227+ if (match (KeyArg, m_ZeroInt ()) &&
3228+ match (DataArg, m_Xor (m_Value (Data), m_Value (Key)))) {
3229+ replaceOperand (*II, 0 , Data);
3230+ replaceOperand (*II, 1 , Key);
3231+ return II;
3232+ }
3233+ break ;
3234+ }
31313235 case Intrinsic::hexagon_V6_vandvrt:
31323236 case Intrinsic::hexagon_V6_vandvrt_128B: {
31333237 // Simplify Q -> V -> Q conversion.
0 commit comments