diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index d758260a8ab5d..1a5f09642ea66 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen HexagonOptAddrMode.cpp HexagonOptimizeSZextends.cpp HexagonPeephole.cpp + HexagonQFPOptimizer.cpp HexagonRDFOpt.cpp HexagonRegisterInfo.cpp HexagonSelectionDAGInfo.cpp diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h index 109aba53b6e3e..422ab20891b94 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.h +++ b/llvm/lib/Target/Hexagon/Hexagon.h @@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &); void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &); void initializeHexagonVectorPrintPass(PassRegistry &); +void initializeHexagonQFPOptimizerPass(PassRegistry &); + Pass *createHexagonLoopIdiomPass(); Pass *createHexagonVectorLoopCarriedReuseLegacyPass(); @@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass(); FunctionPass *createHexagonVectorPrint(); FunctionPass *createHexagonVExtract(); FunctionPass *createHexagonExpandCondsets(); +FunctionPass *createHexagonQFPOptimizer(); } // end namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp new file mode 100644 index 0000000000000..479ac90b7d526 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -0,0 +1,334 @@ +//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions +// optimizer ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Basic infrastructure for optimizing intermediate conversion instructions +// generated while performing vector floating point operations. +// Currently run at the starting of the code generation for Hexagon, cleans +// up redundant conversion instructions and replaces the uses of conversion +// with appropriate machine operand. Liveness is preserved after this pass. +// +// @note: The redundant conversion instructions are not eliminated in this pass. +// In this pass, we are only trying to replace the uses of conversion +// instructions with its appropriate QFP instruction. We are leaving the job to +// Dead instruction Elimination pass to remove redundant conversion +// instructions. +// +// Brief overview of working of this QFP optimizer. +// This version of Hexagon QFP optimizer basically iterates over each +// instruction, checks whether if it belongs to hexagon floating point HVX +// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique +// definition for the machine operands corresponding to the instruction. +// +// Example: +// MachineInstruction *MI be the HVX vadd instruction +// MI -> $v0 = V6_vadd_sf $v1, $v2 +// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg()); +// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg()); +// +// In the above example, DefMI1 and DefMI2 gives the unique definitions +// corresponding to the operands($v1 and &v2 respectively) of instruction MI. +// +// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32, +// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and +// iterates over next instruction. +// +// If one the definitions is conversion instruction then our pass will replace +// the arithmetic instruction with its corresponding mix variant. +// In the above example, if $v1 is conversion instruction +// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3 +// After Transformation: +// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3) +// +// If both the definitions are conversion instructions then the instruction will +// be replaced with its qf variant +// In the above example, if $v1 and $v2 are conversion instructions +// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3 +// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4 +// After Transformation: +// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced +// with $v4) +// +// Currently, in this pass, we are not handling the case when the definitions +// are PHI inst. +// +//===----------------------------------------------------------------------===// +#include +#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +#define DEBUG_TYPE "hexagon-qfp-optimizer" + +using namespace llvm; + +cl::opt + DisableQFOptimizer("disable-qfp-opt", cl::init(false), + cl::desc("Disable optimization of Qfloat operations.")); + +namespace { +const std::map QFPInstMap{ + {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix}, + {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16}, + {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix}, + {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32}, + {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix}, + {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16}, + {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix}, + {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32}, + {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf}, + {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, + {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, + {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; +} // namespace + +namespace llvm { + +FunctionPass *createHexagonQFPOptimizer(); +void initializeHexagonQFPOptimizerPass(PassRegistry &); + +} // namespace llvm + +namespace { + +struct HexagonQFPOptimizer : public MachineFunctionPass { +public: + static char ID; + + HexagonQFPOptimizer() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); + + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + const HexagonSubtarget *HST = nullptr; + const HexagonInstrInfo *HII = nullptr; + const MachineRegisterInfo *MRI = nullptr; +}; + +char HexagonQFPOptimizer::ID = 0; +} // namespace + +INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer", + HEXAGON_QFP_OPTIMIZER, false, false) + +FunctionPass *llvm::createHexagonQFPOptimizer() { + return new HexagonQFPOptimizer(); +} + +bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, + MachineBasicBlock *MBB) { + + // Early exit: + // - if instruction is invalid or has too few operands (QFP ops need 2 sources + // + 1 dest), + // - or does not have a transformation mapping. + if (MI->getNumOperands() < 3) + return false; + auto It = QFPInstMap.find(MI->getOpcode()); + if (It == QFPInstMap.end()) + return false; + unsigned short InstTy = It->second; + + unsigned Op0F = 0; + unsigned Op1F = 0; + // Get the reaching defs of MI, DefMI1 and DefMI2 + MachineInstr *DefMI1 = nullptr; + MachineInstr *DefMI2 = nullptr; + + if (MI->getOperand(1).isReg()) + DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (MI->getOperand(2).isReg()) + DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg()); + if (!DefMI1 || !DefMI2) + return false; + + MachineOperand &Res = MI->getOperand(0); + MachineInstr *Inst1 = nullptr; + MachineInstr *Inst2 = nullptr; + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); + DefMI2->dump()); + + // Get the reaching defs of DefMI + if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() && + DefMI1->getOperand(1).getReg().isVirtual()) + Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg()); + + if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() && + DefMI2->getOperand(1).getReg().isVirtual()) + Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg()); + + unsigned Def1OP = DefMI1->getOpcode(); + unsigned Def2OP = DefMI2->getOpcode(); + + MachineInstrBuilder MIB; + // Case 1: Both reaching defs of MI are qf to sf/hf conversions + if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && + Def2OP == Hexagon::V6_vconv_sf_qf32) || + (Def1OP == Hexagon::V6_vconv_hf_qf16 && + Def2OP == Hexagon::V6_vconv_hf_qf16)) { + + // If the reaching defs of DefMI are W register type, we return + if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() && + MRI->getRegClass(Inst1->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) || + (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() && + MRI->getRegClass(Inst2->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass)) + return false; + + // Analyze the use operands of the conversion to get their KILL status + MachineOperand &Src1 = DefMI1->getOperand(1); + MachineOperand &Src2 = DefMI2->getOperand(1); + + Op0F = getKillRegState(Src1.isKill()); + Src1.setIsKill(false); + + Op1F = getKillRegState(Src2.isKill()); + Src2.setIsKill(false); + + if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) { + auto OuterIt = QFPInstMap.find(MI->getOpcode()); + if (OuterIt == QFPInstMap.end()) + return false; + auto InnerIt = QFPInstMap.find(OuterIt->second); + if (InnerIt == QFPInstMap.end()) + return false; + InstTy = InnerIt->second; + } + + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + + // Case 2: Left operand is conversion to sf/hf + } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && + Def2OP != Hexagon::V6_vconv_sf_qf32) || + (Def1OP == Hexagon::V6_vconv_hf_qf16 && + Def2OP != Hexagon::V6_vconv_hf_qf16)) && + !DefMI2->isPHI() && + (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { + + if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + MachineOperand &Src1 = DefMI1->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + + Op0F = getKillRegState(Src1.isKill()); + Src1.setIsKill(false); + Op1F = getKillRegState(Src2.isKill()); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + + // Case 2: Left operand is conversion to sf/hf + } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && + Def2OP == Hexagon::V6_vconv_sf_qf32) || + (Def1OP != Hexagon::V6_vconv_hf_qf16 && + Def2OP == Hexagon::V6_vconv_hf_qf16)) && + !DefMI1->isPHI() && + (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { + // The second operand of original instruction is converted. + // In "mix" instructions, "qf" operand is always the first operand. + + // Caveat: vsub is not commutative w.r.t operands. + if (InstTy == Hexagon::V6_vsub_qf16_mix || + InstTy == Hexagon::V6_vsub_qf32_mix) + return false; + + if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = DefMI2->getOperand(1); + + Op1F = getKillRegState(Src2.isKill()); + Src2.setIsKill(false); + Op0F = getKillRegState(Src1.isKill()); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src2.getReg(), Op1F, + Src2.getSubReg()) // Notice the operands are flipped. + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + } + + return false; +} + +bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { + + bool Changed = false; + + if (DisableQFOptimizer) + return Changed; + + HST = &MF.getSubtarget(); + if (!HST->useHVXV68Ops() || !HST->usePackets() || + skipFunction(MF.getFunction())) + return false; + HII = HST->getInstrInfo(); + MRI = &MF.getRegInfo(); + + MachineFunction::iterator MBBI = MF.begin(); + LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName() + << " Optimize intermediate conversions ===\n"); + while (MBBI != MF.end()) { + MachineBasicBlock *MBB = &*MBBI; + MachineBasicBlock::iterator MII = MBBI->instr_begin(); + while (MII != MBBI->instr_end()) { + MachineInstr *MI = &*MII; + ++MII; // As MI might be removed. + + if (QFPInstMap.count(MI->getOpcode()) && + MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && + MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); + if (optimizeQfp(MI, MBB)) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "\t....Removing...."); + Changed = true; + } + } + } + ++MBBI; + } + return Changed; +} diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index f5d8b696733ba..d9824a3154093 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() { initializeHexagonPeepholePass(PR); initializeHexagonSplitConst32AndConst64Pass(PR); initializeHexagonVectorPrintPass(PR); + initializeHexagonQFPOptimizerPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, @@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() { addPass(createHexagonGenInsert()); if (EnableEarlyIf) addPass(createHexagonEarlyIfConversion()); + addPass(createHexagonQFPOptimizer()); } return false; diff --git a/llvm/test/CodeGen/Hexagon/qfp-conv.ll b/llvm/test/CodeGen/Hexagon/qfp-conv.ll new file mode 100644 index 0000000000000..d2d393e1a859d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-conv.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=hexagon -mattr=+hvxv68,+hvx,+hvx-length128b < %s | FileCheck %s + +; Test that the Qfloat optimization pass doesn't crash due to an invalid +; instructions. + +; CHECK: v{{[0-9]+}}.hf = v{{[0-9]:[0-9]}}.qf32 + +define void @test( + <32 x i32>* %optr, + <64 x i32> %in64, + <32 x i32> %va, + <32 x i32> %vb +) local_unnamed_addr #0 { +entry: + br label %for.body + +for.body: + %optr.068 = phi <32 x i32>* [ %optr, %entry ], [ %incdec.ptr6, %for.body ] + %0 = tail call <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32> %in64) #2 + %1 = tail call <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32> %0) #2 + %2 = tail call <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32> %va, <32 x i32> %1) #2 + %3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %2, <32 x i32> %va, <32 x i32> %vb) #2 + %4 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %3, <32 x i32> %vb) #2 + %5 = tail call <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32> %va, <32 x i32> %4) #2 + store <32 x i32> %5, <32 x i32>* %optr.068, align 1 + %incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %optr.068, i32 1 + br label %for.body +} + +declare <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32>, <32 x i32>) #1 +declare <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32>, <32 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1 diff --git a/llvm/test/CodeGen/Hexagon/qfp-enabled.ll b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll new file mode 100644 index 0000000000000..a5cc5fa43167e --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll @@ -0,0 +1,19 @@ +; Tests if the flag to disable qfp optimizer pass works or not. + +; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \ +; RUN: < %s -o -| FileCheck %s --check-prefix=ENABLED +; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \ +; RUN: -disable-qfp-opt < %s -o -| FileCheck %s --check-prefix=DISABLED + +define dso_local <32 x i32> @conv1_qf32(<32 x i32> noundef %input1, <32 x i32> noundef %input2) local_unnamed_addr { +entry: +; DISABLED: [[V2:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf) +; DISABLED: [[V3:v[0-9]+]].sf = [[V2]].qf32 +; DISABLED: qf32 = vadd(v0.sf,[[V3]].sf) +; ENABLED: [[V4:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf) +; ENABLED: qf32 = vadd([[V4]].qf32,v0.sf) + %0 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %input2) + %1 = tail call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %0) + %2 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %1) + ret <32 x i32> %2 +} diff --git a/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir new file mode 100644 index 0000000000000..d8dde7d70885b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir @@ -0,0 +1,95 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer -run-pass machineverifier %s -o - | FileCheck %s + +# Test that the killed RegState from DefMI operands are removed +# killed RegState should be set for MI operands +# CHECK-LABEL: name: qfpAdd +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]] +# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32 killed %[[REG1]], killed %[[REG2]] +# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG3:([0-9]+)]] +# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG4:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32 killed %[[REG3]], killed %[[REG4]] + +--- +name: qfpAdd +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %7:hvxvr = V6_vL32Ub_ai %3:intregs, 0 + %8:hvxvr = V6_vconv_sf_qf32 killed %4:hvxvr + %9:hvxvr = V6_vconv_sf_qf32 killed %5:hvxvr + %10:hvxvr = V6_vadd_sf %8:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr + %12:hvxvr = V6_vconv_sf_qf32 killed %7:hvxvr + %13:hvxvr = V6_vadd_sf killed %11:hvxvr, killed %12:hvxvr +... + + +# Test that the killed RegState from DefMI operands are removed +# CHECK-LABEL: name: qfpAddMix +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9]+}} +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG2]], %{{[0-9]+}} + +--- +name: qfpAddMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr + %8:hvxvr = V6_vadd_sf %3:hvxvr, %7:hvxvr + %9:hvxvr = V6_vmpy_qf32_sf %4, %5 + %10:hvxvr = V6_vconv_sf_qf32 killed %9:hvxvr + %11:hvxvr = V6_vadd_sf %3:hvxvr, killed %10:hvxvr +... + + +# Test that we do generate V6_vsub_qf32_mix for the below test. +# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32 +# is passed as first operand. So, V6_vsub_qf32_mix must be generated. +# CHECK-LABEL: name: qfpAddSwapMix +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9]+}} +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG2]], %{{[0-9]+}} + +--- +name: qfpAddSwapMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr + %8:hvxvr = V6_vadd_sf %7:hvxvr, %3:hvxvr + %9:hvxvr = V6_vmpy_qf32_sf %4, %5 + %10:hvxvr = V6_vconv_sf_qf32 killed %9:hvxvr + %11:hvxvr = V6_vadd_sf killed %10:hvxvr, %3:hvxvr +... diff --git a/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir b/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir new file mode 100644 index 0000000000000..1d78203cf5d5a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir @@ -0,0 +1,33 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + +# CHECK: V6_vshuffvdd +# CHECK: V6_vadd_sf +# CHECK: V6_vadd_qf32_mix{{.*}}vsub_lo +# CHECK: V6_vadd_qf32_mix{{.*}}vsub_hi + +--- +name: qfp_subreg_fix +alignment: 16 +tracksRegLiveness: true + +body: | + bb.0: + %10:intregs = IMPLICIT_DEF + %9:hvxvr = V6_vL32Ub_ai %10, 0 :: (load (s1024) from `ptr undef`, align 4) + %11:intregs = A2_tfrsi 15360 + %12:hvxvr = V6_lvsplath %11 + %13:hvxwr = V6_vmpy_qf32_hf %9, %12 + %15:hvxvr = V6_vconv_sf_qf32 %13.vsub_lo + %17:hvxvr = V6_vconv_sf_qf32 %13.vsub_hi + %18:intregslow8 = A2_tfrsi -4 + %19:hvxwr = V6_vshuffvdd %17, %15, %18 + %21:hvxvr = V6_vadd_sf %19.vsub_hi, %19.vsub_hi + %22:hvxvr = V6_vconv_sf_qf32 %21 + %24:hvxvr = V6_vadd_sf %19.vsub_lo, %19.vsub_lo + %25:hvxvr = V6_vconv_sf_qf32 %24 + %26:hvxvr = V6_vadd_sf %25, %19.vsub_lo + %27:hvxvr = V6_vconv_sf_qf32 %26 + %28:hvxvr = V6_vadd_sf %22, %19.vsub_hi + %29:hvxvr = V6_vconv_sf_qf32 %28 + +... diff --git a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll new file mode 100644 index 0000000000000..c16370c3b907d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll @@ -0,0 +1,21 @@ +; Tests if generated vadd instruction takes in qf32 +; type as first parameter instead of a sf type without +; any conversion instruction of type sf = qf32 + +; RUN: llc -mtriple=hexagon < %s -o - | FileCheck %s + +; CHECK: [[V2:v[0-9]+]] = vxor([[V2]],[[V2]]) +; CHECK: [[V0:v[0-9]+]].qf32 = vmpy([[V0]].sf,[[V2]].sf) +; CHECK: [[V1:v[0-9]+]].qf32 = vmpy([[V1]].sf,[[V2]].sf) +; CHECK: [[V4:v[0-9]+]].qf32 = vadd([[V0]].qf32,[[V2]].sf) +; CHECK: [[V5:v[0-9]+]].qf32 = vadd([[V1]].qf32,[[V2]].sf) + +define void @_Z19compute_ripple_geluIDF16_EviPT_PKS0_(ptr %out_ptr, <64 x float> %conv14.ripple.vectorized) #0 { +entry: + %mul16.ripple.vectorized = fmul <64 x float> %conv14.ripple.vectorized, zeroinitializer + %conv17.ripple.vectorized = fptrunc <64 x float> %mul16.ripple.vectorized to <64 x half> + store <64 x half> %conv17.ripple.vectorized, ptr %out_ptr, align 2 + ret void +} + +attributes #0 = { "target-features"="+hvx-length128b,+hvxv75,+v75,-long-calls,-small-data" } diff --git a/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir b/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir new file mode 100644 index 0000000000000..9a9e938f35d85 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir @@ -0,0 +1,79 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + + +# Test that the operands are swapped for Add if the second operand +# is a qf32 to sf conversion. V6_vadd_qf32_mix supports first operand +# as qf32. +# CHECK-LABEL: name: qfpAddMix +# CHECK: %[[REG:([0-9]+)]]:hvxvr = V6_vmpy_qf32_sf +# CHECK: V6_vadd_qf32_mix %[[REG]] + +--- +name: qfpAddMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vadd_sf %3:hvxvr, %7:hvxvr +... + + +# Test that we do not generate V6_vsub_qf32_mix for the below test. +# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32 +# is passed as second operand. As sub is not commutative, we should not +# generate the mix instruction. +# CHECK-LABEL: name: qfpSubNoMix +# CHECK-NOT: V6_vsub_qf32_mix + +--- +name: qfpSubNoMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vsub_sf %3:hvxvr, %7:hvxvr +... + + +# Test that we do generate V6_vsub_qf32_mix for the below test. +# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32 +# is passed as first operand. So, V6_vsub_qf32_mix must be generated. +# CHECK-LABEL: name: qfpSubMix +# CHECK: V6_vsub_qf32_mix + +--- +name: qfpSubMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vsub_sf %7:hvxvr, %3:hvxvr +... diff --git a/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir b/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir new file mode 100644 index 0000000000000..f0b1d3c96bbb3 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir @@ -0,0 +1,23 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + +# CHECK-LABEL: name: qfpAdd32 +# CHECK: V6_vd0 +# CHECK-NEXT: V6_vL32Ub_ai +# CHECK-NEXT: V6_vadd_sf +# CHECK-NEXT: V6_vconv_sf_qf32 +# CHECK-NEXT: V6_vS32Ub_ai +--- +name: qfpAdd32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %3:hvxvr = V6_vd0 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vadd_sf %3:hvxvr, %4:hvxvr + %6:hvxvr = V6_vconv_sf_qf32 %5:hvxvr + V6_vS32Ub_ai %1:intregs, 0, %6:hvxvr +... diff --git a/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll b/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll new file mode 100644 index 0000000000000..2d46da7a039bc --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; REQUIRES: hexagon + +; This test was asserting because getVRegDef() was called on a register with +; multiple defs. +; Checks that the test does not assert and vsub is generated. +; CHECK: vsub + +target triple = "hexagon" + +@v = common dso_local local_unnamed_addr global <32 x i32> zeroinitializer, align 128 + +; Function Attrs: nounwind +define dso_local void @hvx_twoSum(<32 x i32>* nocapture noundef writeonly %s2lo) local_unnamed_addr #0 { +entry: + %0 = load <32 x i32>, <32 x i32>* @v, align 128 + %call = tail call inreg <32 x i32> @MY_Vsf_equals_Vqf32(<32 x i32> noundef %0) #3 + %1 = tail call <32 x i32> @llvm.hexagon.V6.vsub.sf.128B(<32 x i32> %call, <32 x i32> %call) + store <32 x i32> %1, <32 x i32>* @v, align 128 + store <32 x i32> %1, <32 x i32>* %s2lo, align 128 + ret void +} + +declare dso_local inreg <32 x i32> @MY_Vsf_equals_Vqf32(<32 x i32> noundef) local_unnamed_addr #1 + +; Function Attrs: nofree nosync nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vsub.sf.128B(<32 x i32>, <32 x i32>) #2 + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvx-length128b,+hvxv73,+v73,-long-calls" } +attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvx-length128b,+hvxv73,+v73,-long-calls" } +attributes #2 = { nofree nosync nounwind readnone } +attributes #3 = { nounwind }