Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "AArch64TargetTransformInfo.h"
#include "../ARMCommon/ARMCommonInstCombineIntrinsic.h"
#include "AArch64ExpandImm.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64SMEAttributes.h"
Expand Down Expand Up @@ -2856,6 +2857,18 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_neon_fmaxnm:
case Intrinsic::aarch64_neon_fminnm:
return instCombineMaxMinNM(IC, II);
case Intrinsic::aarch64_neon_tbl1:
return ARMCommon::simplifyNeonTbl1(II, IC);
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull: {
bool IsSigned = IID == Intrinsic::aarch64_neon_smull;
return ARMCommon::simplifyNeonMultiply(II, IC, IsSigned);
}
case Intrinsic::aarch64_crypto_aesd:
case Intrinsic::aarch64_crypto_aese:
case Intrinsic::aarch64_sve_aesd:
case Intrinsic::aarch64_sve_aese:
return ARMCommon::simplifyAES(II, IC);
case Intrinsic::aarch64_sve_convert_from_svbool:
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ add_llvm_target(AArch64CodeGen
AArch64Desc
AArch64Info
AArch64Utils
ARMCommon
Analysis
AsmPrinter
CFGuard
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "ARMTargetTransformInfo.h"
#include "../ARMCommon/ARMCommonInstCombineIntrinsic.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/APInt.h"
Expand Down Expand Up @@ -182,6 +183,19 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}

case Intrinsic::arm_neon_vtbl1:
return ARMCommon::simplifyNeonTbl1(II, IC);

case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
bool IsSigned = IID == Intrinsic::arm_neon_vmulls;
return ARMCommon::simplifyNeonMultiply(II, IC, IsSigned);
}

case Intrinsic::arm_neon_aesd:
case Intrinsic::arm_neon_aese:
return ARMCommon::simplifyAES(II, IC);

case Intrinsic::arm_mve_pred_i2v: {
Value *Arg = II.getArgOperand(0);
Value *ArgArg;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/ARM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp

LINK_COMPONENTS
ARMCommon
ARMDesc
ARMInfo
ARMUtils
Expand Down
136 changes: 136 additions & 0 deletions llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
//===- ARMCommonInstCombineIntrinsic.cpp -
// instCombineIntrinsic opts for both ARM and AArch64 ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains optimizations for ARM and AArch64 intrinsics that
/// are shared between both architectures. These functions can be called from:
/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics)
/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_*
/// intrinsics)
///
//===----------------------------------------------------------------------===//

#include "ARMCommonInstCombineIntrinsic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"

using namespace llvm;
using namespace llvm::PatternMatch;

namespace llvm {
namespace ARMCommon {

/// Convert a table lookup to shufflevector if the mask is constant.
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
/// as it's actually a byte reverse.
Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC) {
// Bail out if the mask is not a constant.
auto *C = dyn_cast<Constant>(II.getArgOperand(1));
if (!C)
return nullptr;

auto *VecTy = cast<FixedVectorType>(II.getType());
unsigned NumElts = VecTy->getNumElements();

// Only perform this transformation for <8 x i8> vector types.
if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
return nullptr;

int Indexes[8];

for (unsigned I = 0; I < NumElts; ++I) {
Constant *COp = C->getAggregateElement(I);

if (!COp || !isa<ConstantInt>(COp))
return nullptr;

Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();

// Make sure the mask indices are in range.
if ((unsigned)Indexes[I] >= NumElts)
return nullptr;
}

auto *V1 = II.getArgOperand(0);
auto *V2 = Constant::getNullValue(V1->getType());
Value *Shuf = IC.Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
return IC.replaceInstUsesWith(II, Shuf);
}

/// Simplify NEON multiply-long intrinsics (smull, umull).
/// These intrinsics perform widening multiplies: they multiply two vectors of
/// narrow integers and produce a vector of wider integers. This function
/// performs algebraic simplifications:
/// 1. Multiply by zero => zero vector
/// 2. Multiply by one => zero/sign-extend the non-one operand
/// 3. Both operands constant => regular multiply that can be constant-folded
/// later
Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
bool IsSigned) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);

// Handle mul by zero first:
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
return IC.replaceInstUsesWith(II, ConstantAggregateZero::get(II.getType()));
}

// Check for constant LHS & RHS - in this case we just simplify.
VectorType *NewVT = cast<VectorType>(II.getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
Value *V0 = IC.Builder.CreateIntCast(CV0, NewVT, IsSigned);
Value *V1 = IC.Builder.CreateIntCast(CV1, NewVT, IsSigned);
return IC.replaceInstUsesWith(II, IC.Builder.CreateMul(V0, V1));
}

// Couldn't simplify - canonicalize constant to the RHS.
std::swap(Arg0, Arg1);
}

// Handle mul by one:
if (Constant *CV1 = dyn_cast<Constant>(Arg1))
if (ConstantInt *Splat =
dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
if (Splat->isOne())
return CastInst::CreateIntegerCast(Arg0, II.getType(), IsSigned);

return nullptr;
}

/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
///
/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as
/// separate arguments, before performing the encryption/decryption operation.
/// We can fold that "internal" XOR with a previous one.
Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC) {
Value *DataArg = II.getArgOperand(0);
Value *KeyArg = II.getArgOperand(1);

// Accept zero on either operand.
if (!match(KeyArg, m_ZeroInt()))
std::swap(KeyArg, DataArg);

// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
Value *Data, *Key;
if (match(KeyArg, m_ZeroInt()) &&
match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
IC.replaceOperand(II, 0, Data);
IC.replaceOperand(II, 1, Key);
return &II;
}

return nullptr;
}

} // namespace ARMCommon
} // namespace llvm
57 changes: 57 additions & 0 deletions llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//===- ARMCommonInstCombineIntrinsic.h -
// instCombineIntrinsic opts for both ARM and AArch64 -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains optimizations for ARM and AArch64 intrinsics that
/// are shared between both architectures. These functions can be called from:
/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics)
/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_*
/// intrinsics)
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
#define LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H

#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"

namespace llvm {

namespace ARMCommon {

/// Convert a table lookup to shufflevector if the mask is constant.
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
/// as it's actually a byte reverse.
Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC);

/// Simplify NEON multiply-long intrinsics (smull, umull).
/// These intrinsics perform widening multiplies: they multiply two vectors of
/// narrow integers and produce a vector of wider integers. This function
/// performs algebraic simplifications:
/// 1. Multiply by zero => zero vector
/// 2. Multiply by one => zero/sign-extend the non-one operand
/// 3. Both operands constant => regular multiply that can be constant-folded
/// later
Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
bool IsSigned);

/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
///
/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as
/// separate arguments, before performing the encryption/decryption operation.
/// We can fold that "internal" XOR with a previous one.
Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC);

} // namespace ARMCommon
} // namespace llvm

#endif // LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
8 changes: 8 additions & 0 deletions llvm/lib/Target/ARMCommon/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
add_llvm_component_library(LLVMARMCommon
ARMCommonInstCombineIntrinsic.cpp

LINK_COMPONENTS
Core
Support
TransformUtils
)
5 changes: 5 additions & 0 deletions llvm/lib/Target/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ if (NOT BUILD_SHARED_LIBS AND NOT APPLE AND
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
endif()

# Add shared ARM/AArch64 utilities if either target is being built
if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD OR "AArch64" IN_LIST LLVM_TARGETS_TO_BUILD)
add_subdirectory(ARMCommon)
endif()

foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
Expand Down
Loading