|
| 1 | +//===- ARMCommonInstCombineIntrinsic.cpp - Shared ARM/AArch64 opts -------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +/// |
| 9 | +/// \file |
| 10 | +/// This file contains optimizations for ARM and AArch64 intrinsics that |
| 11 | +/// are shared between both architectures. These functions can be called from: |
| 12 | +/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics) |
| 13 | +/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_* |
| 14 | +/// intrinsics) |
| 15 | +/// |
| 16 | +//===----------------------------------------------------------------------===// |
| 17 | + |
| 18 | +#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h" |
| 19 | +#include "llvm/IR/Constants.h" |
| 20 | +#include "llvm/IR/IntrinsicInst.h" |
| 21 | +#include "llvm/IR/Value.h" |
| 22 | +#include "llvm/Transforms/InstCombine/InstCombiner.h" |
| 23 | + |
| 24 | +using namespace llvm; |
| 25 | +using namespace llvm::PatternMatch; |
| 26 | + |
| 27 | +namespace llvm { |
| 28 | +namespace ARMCommon { |
| 29 | + |
| 30 | +/// Convert a table lookup to shufflevector if the mask is constant. |
| 31 | +/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in |
| 32 | +/// which case we could lower the shufflevector with rev64 instructions |
| 33 | +/// as it's actually a byte reverse. |
| 34 | +Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC) { |
| 35 | + // Bail out if the mask is not a constant. |
| 36 | + auto *C = dyn_cast<Constant>(II.getArgOperand(1)); |
| 37 | + if (!C) |
| 38 | + return nullptr; |
| 39 | + |
| 40 | + auto *VecTy = cast<FixedVectorType>(II.getType()); |
| 41 | + unsigned NumElts = VecTy->getNumElements(); |
| 42 | + |
| 43 | + // Only perform this transformation for <8 x i8> vector types. |
| 44 | + if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8) |
| 45 | + return nullptr; |
| 46 | + |
| 47 | + int Indexes[8]; |
| 48 | + |
| 49 | + for (unsigned I = 0; I < NumElts; ++I) { |
| 50 | + Constant *COp = C->getAggregateElement(I); |
| 51 | + |
| 52 | + if (!COp || !isa<ConstantInt>(COp)) |
| 53 | + return nullptr; |
| 54 | + |
| 55 | + Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue(); |
| 56 | + |
| 57 | + // Make sure the mask indices are in range. |
| 58 | + if ((unsigned)Indexes[I] >= NumElts) |
| 59 | + return nullptr; |
| 60 | + } |
| 61 | + |
| 62 | + auto *V1 = II.getArgOperand(0); |
| 63 | + auto *V2 = Constant::getNullValue(V1->getType()); |
| 64 | + Value *Shuf = IC.Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes)); |
| 65 | + return IC.replaceInstUsesWith(II, Shuf); |
| 66 | +} |
| 67 | + |
| 68 | +/// Simplify NEON multiply-long intrinsics (smull, umull). |
| 69 | +/// These intrinsics perform widening multiplies: they multiply two vectors of |
| 70 | +/// narrow integers and produce a vector of wider integers. This function |
| 71 | +/// performs algebraic simplifications: |
| 72 | +/// 1. Multiply by zero => zero vector |
| 73 | +/// 2. Multiply by one => zero/sign-extend the non-one operand |
| 74 | +/// 3. Both operands constant => regular multiply that can be constant-folded |
| 75 | +/// later |
| 76 | +Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC, |
| 77 | + bool IsSigned) { |
| 78 | + Value *Arg0 = II.getArgOperand(0); |
| 79 | + Value *Arg1 = II.getArgOperand(1); |
| 80 | + |
| 81 | + // Handle mul by zero first: |
| 82 | + if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { |
| 83 | + return IC.replaceInstUsesWith(II, ConstantAggregateZero::get(II.getType())); |
| 84 | + } |
| 85 | + |
| 86 | + // Check for constant LHS & RHS - in this case we just simplify. |
| 87 | + VectorType *NewVT = cast<VectorType>(II.getType()); |
| 88 | + if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { |
| 89 | + if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { |
| 90 | + Value *V0 = IC.Builder.CreateIntCast(CV0, NewVT, IsSigned); |
| 91 | + Value *V1 = IC.Builder.CreateIntCast(CV1, NewVT, IsSigned); |
| 92 | + return IC.replaceInstUsesWith(II, IC.Builder.CreateMul(V0, V1)); |
| 93 | + } |
| 94 | + |
| 95 | + // Couldn't simplify - canonicalize constant to the RHS. |
| 96 | + std::swap(Arg0, Arg1); |
| 97 | + } |
| 98 | + |
| 99 | + // Handle mul by one: |
| 100 | + if (Constant *CV1 = dyn_cast<Constant>(Arg1)) |
| 101 | + if (ConstantInt *Splat = |
| 102 | + dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) |
| 103 | + if (Splat->isOne()) |
| 104 | + return CastInst::CreateIntegerCast(Arg0, II.getType(), IsSigned); |
| 105 | + |
| 106 | + return nullptr; |
| 107 | +} |
| 108 | + |
| 109 | +/// Simplify AES encryption/decryption intrinsics (AESE, AESD). |
| 110 | +/// |
| 111 | +/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as |
| 112 | +/// separate arguments, before performing the encryption/decryption operation. |
| 113 | +/// We can fold that "internal" XOR with a previous one. |
| 114 | +Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC) { |
| 115 | + Value *DataArg = II.getArgOperand(0); |
| 116 | + Value *KeyArg = II.getArgOperand(1); |
| 117 | + |
| 118 | + // Accept zero on either operand. |
| 119 | + if (!match(KeyArg, m_ZeroInt())) |
| 120 | + std::swap(KeyArg, DataArg); |
| 121 | + |
| 122 | + // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR |
| 123 | + Value *Data, *Key; |
| 124 | + if (match(KeyArg, m_ZeroInt()) && |
| 125 | + match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { |
| 126 | + IC.replaceOperand(II, 0, Data); |
| 127 | + IC.replaceOperand(II, 1, Key); |
| 128 | + return &II; |
| 129 | + } |
| 130 | + |
| 131 | + return nullptr; |
| 132 | +} |
| 133 | + |
| 134 | +} // namespace ARMCommon |
| 135 | +} // namespace llvm |
0 commit comments