diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2540921b75e5d..620eac428c084 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -447,6 +447,35 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { return; } + bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN(); + if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 && + CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) { + uint64_t C = 0; + bool AllConst = true; + unsigned EltSize = EltVT.getSizeInBits(); + for (unsigned I = 0; I < NumVectorElts; ++I) { + SDValue Op = N->getOperand(I); + if (Op.isUndef()) { + AllConst = false; + break; + } + uint64_t Val; + if (ConstantFPSDNode *CF = dyn_cast(Op)) { + Val = CF->getValueAPF().bitcastToAPInt().getZExtValue(); + } else + Val = cast(Op)->getZExtValue(); + C |= Val << (EltSize * I); + } + if (AllConst) { + SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64); + MachineSDNode *Copy = + CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV); + CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0), + RegClass); + return; + } + } + assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not " "supported yet"); // 32 = Max Num Vector Elements @@ -454,7 +483,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { // 1 = Vector Register Class SmallVector RegSeqArgs(NumVectorElts * 2 + 1); - bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN(); RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); bool IsRegSeq = true; unsigned NOps = N->getNumOperands(); @@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: case ISD::ConstantFP: { - if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) + if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) || + Subtarget->has64BitLiterals()) break; uint64_t Imm; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e90316cee12fe..21bd017540b09 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12155,6 +12155,11 @@ SDValue SITargetLowering::splitBinaryBitConstantOp( if ((bitOpWithConstantIsReducible(Opc, ValLo) || bitOpWithConstantIsReducible(Opc, ValHi)) || (CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) { + // We have 64-bit scalar and/or/xor, but do not have vector forms. + if (Subtarget->has64BitLiterals() && CRHS->hasOneUse() && + !CRHS->user_begin()->isDivergent()) + return SDValue(); + // If we need to materialize a 64-bit immediate, it will be split up later // anyway. Avoid creating the harder to understand 64-bit immediate // materialization. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4c5f938831243..9b9291e8ef199 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2273,6 +2273,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::S_MOV_B64_IMM_PSEUDO: { const MachineOperand &SrcOp = MI.getOperand(1); assert(!SrcOp.isFPImm()); + + if (ST.has64BitLiterals()) { + MI.setDesc(get(AMDGPU::S_MOV_B64)); + break; + } + APInt Imm(64, SrcOp.getImm()); if (Imm.isIntN(32) || isInlineConstant(Imm)) { MI.setDesc(get(AMDGPU::S_MOV_B64)); @@ -6099,14 +6105,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; if (Is64BitOp && !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) { - if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp)) + if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) && + (!ST.has64BitLiterals() || InstDesc.getSize() != 4)) return false; // FIXME: We can use sign extended 64-bit literals, but only for signed // operands. At the moment we do not know if an operand is signed. // Such operand will be encoded as its low 32 bits and then either // correctly sign extended or incorrectly zero extended by HW. - if (!Is64BitFPOp && (int32_t)Imm < 0) + // If 64-bit literals are supported and the literal will be encoded + // as full 64 bit we still can use it. + if (!Is64BitFPOp && (int32_t)Imm < 0 && + (!ST.has64BitLiterals() || AMDGPU::isValid32BitLiteral(Imm, false))) return false; } } @@ -9178,15 +9188,30 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (isDPP(MI)) return DescSize; bool HasLiteral = false; + unsigned LiteralSize = 4; for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { const MachineOperand &Op = MI.getOperand(I); const MCOperandInfo &OpInfo = Desc.operands()[I]; if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) { HasLiteral = true; + if (ST.has64BitLiterals()) { + switch (OpInfo.OperandType) { + default: + break; + case AMDGPU::OPERAND_REG_IMM_FP64: + if (!AMDGPU::isValid32BitLiteral(Op.getImm(), true)) + LiteralSize = 8; + break; + case AMDGPU::OPERAND_REG_IMM_INT64: + if (!Op.isImm() || !AMDGPU::isValid32BitLiteral(Op.getImm(), false)) + LiteralSize = 8; + break; + } + } break; } } - return HasLiteral ? DescSize + 4 : DescSize; + return HasLiteral ? DescSize + LiteralSize : DescSize; } // Check whether we have extra NSA words. diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index fd39b8a1350c6..4a4b865dc5d1d 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -1058,7 +1058,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { // fold an immediate into the shrunk instruction as a literal operand. In // GFX10 VOP3 instructions can take a literal operand anyway, so there is // no advantage to doing this. - if (ST->hasVOP3Literal() && !IsPostRA) + // However, if 64-bit literals are allowed we still need to shrink it + // for such literal to be able to fold. + if (ST->hasVOP3Literal() && + (!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) && + !IsPostRA) continue; if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index a32078cc403e7..9df2bdededa13 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -639,6 +639,7 @@ bool isMAC(unsigned Opc) { Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || + Opc == AMDGPU::V_FMAC_F64_e64_gfx12 || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 25c6cbc3e1ab5..030a6e1e978c1 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -175,10 +175,14 @@ multiclass VOP2Inst_e64, Commutable_REV; - let SubtargetPredicate = isGFX11Plus in { - if P.HasExtVOP3DPP then - def _e64_dpp : VOP3_DPP_Pseudo ; - } // End SubtargetPredicate = isGFX11Plus + if P.HasExtVOP3DPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let SubtargetPredicate = isGFX11Plus; + } + else if P.HasExt64BitDPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let OtherPredicates = [HasDPALU_DPP]; + } } multiclass VOP2Inst_e64_VOPD op, VOP2_DPP_Pseudo ps, VOP2_DPP { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = ps.SubtargetPredicate; - let OtherPredicates = ps.OtherPredicates; + let OtherPredicates = !listconcat(ps.OtherPredicates, + !if(p.HasExt64BitDPP, [HasDPALU_DPP], []), + !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], [])); } class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, int subtarget, @@ -1832,6 +1838,9 @@ let SubtargetPredicate = isGFX12Plus in { V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">; } // End SubtargetPredicate = isGFX12Plus +let SubtargetPredicate = HasFmacF64Inst in +defm V_FMAC_F64 : VOP2_Real_FULL; + defm V_FMAMK_F64 : VOP2Only_Real_MADK64; defm V_FMAAK_F64 : VOP2Only_Real_MADK64; diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll index ac03d2dae8fa8..dea9142cf2bee 100644 --- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll +++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll @@ -1,8 +1,10 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1250 %s declare float @llvm.fabs.f32(float) declare float @llvm.fma.f32(float, float, float) @@ -35,11 +37,19 @@ define float @v_mul_f32_vop2(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop2: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %mul = fmul float %x, %y ret float %mul } ; NOT-GFX12: codeLenInByte = 12 ; GFX1200: codeLenInByte = 28 +; GFX1250: codeLenInByte = 16 define float @v_mul_f32_vop2_inline_imm(float %x) { ; GFX9-LABEL: v_mul_f32_vop2_inline_imm: @@ -69,11 +79,19 @@ define float @v_mul_f32_vop2_inline_imm(float %x) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop2_inline_imm: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %mul = fmul float %x, 4.0 ret float %mul } ; NOT-GFX12: codeLenInByte = 12 ; GFX1200: codeLenInByte = 28 +; GFX1250: codeLenInByte = 16 define float @v_mul_f32_vop2_literal(float %x) { ; GFX9-LABEL: v_mul_f32_vop2_literal: @@ -103,11 +121,19 @@ define float @v_mul_f32_vop2_literal(float %x) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop2_literal: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %mul = fmul float %x, 123.0 ret float %mul } ; NOT-GFX12: codeLenInByte = 16 ; GFX1200: codeLenInByte = 32 +; GFX1250: codeLenInByte = 20 define float @v_mul_f32_vop3_src_mods(float %x, float %y) { ; GFX9-LABEL: v_mul_f32_vop3_src_mods: @@ -137,12 +163,20 @@ define float @v_mul_f32_vop3_src_mods(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop3_src_mods: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fabs.x = call float @llvm.fabs.f32(float %x) %mul = fmul float %fabs.x, %y ret float %mul } ; NOT-GFX12: codeLenInByte = 16 ; GFX1200: codeLenInByte = 32 +; GFX1250: codeLenInByte = 20 define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) { ; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm: @@ -172,6 +206,13 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop3_src_mods_inline_imm: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fabs.x = call float @llvm.fabs.f32(float %x) %mul = fmul float %fabs.x, 4.0 ret float %mul @@ -179,6 +220,7 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) { ; NOT-GFX12: codeLenInByte = 16 ; GFX1200: codeLenInByte = 32 +; GFX1250: codeLenInByte = 20 define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) { ; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal: @@ -209,6 +251,13 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop3_src_mods_literal: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fabs.x = call float @llvm.fabs.f32(float %x) %mul = fmul float %fabs.x, 123.0 ret float %mul @@ -218,6 +267,7 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) { ; GFX10: codeLenInByte = 20 ; GFX11: codeLenInByte = 20 ; GFX1200: codeLenInByte = 36 +; GFX1250: codeLenInByte = 24 define float @v_mul_f32_vop2_frame_index(float %x) { ; GFX9-LABEL: v_mul_f32_vop2_frame_index: @@ -249,6 +299,13 @@ define float @v_mul_f32_vop2_frame_index(float %x) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f32_vop2_frame_index: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %alloca = alloca i32, addrspace(5) %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32 %cast = bitcast i32 %ptrtoint to float @@ -260,6 +317,7 @@ define float @v_mul_f32_vop2_frame_index(float %x) { ; GFX10: codeLenInByte = 20 ; GFX11: codeLenInByte = 12 ; GFX1200: codeLenInByte = 28 +; GFX1250: codeLenInByte = 16 define float @v_fma_f32(float %x, float %y, float %z) { ; GFX9-LABEL: v_fma_f32: @@ -289,12 +347,20 @@ define float @v_fma_f32(float %x, float %y, float %z) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_fma_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fma = call float @llvm.fma.f32(float %x, float %y, float %z) ret float %fma } ; NOT-GFX12: codeLenInByte = 16 ; GFX1200: codeLenInByte = 32 +; GFX1250: codeLenInByte = 20 define float @v_fma_f32_src_mods(float %x, float %y, float %z) { ; GFX9-LABEL: v_fma_f32_src_mods: @@ -324,6 +390,13 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_fma_f32_src_mods: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fabs.x = call float @llvm.fabs.f32(float %x) %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z) ret float %fma @@ -331,6 +404,7 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) { ; NOT-GFX12: codeLenInByte = 16 ; GFX1200: codeLenInByte = 32 +; GFX1250: codeLenInByte = 20 define float @v_fmac_f32(float %x, float %y) { ; GFX9-LABEL: v_fmac_f32: @@ -360,6 +434,13 @@ define float @v_fmac_f32(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_fmac_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fma = call float @llvm.fma.f32(float %x, float %y, float %x) ret float %fma } @@ -368,6 +449,7 @@ define float @v_fmac_f32(float %x, float %y) { ; GFX10: codeLenInByte = 12 ; GFX11: codeLenInByte = 12 ; GFX1200: codeLenInByte = 28 +; GFX1250: codeLenInByte = 16 define float @v_fmaak_f32(float %x, float %y) { ; GFX9-LABEL: v_fmaak_f32: @@ -398,6 +480,13 @@ define float @v_fmaak_f32(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_fmaak_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0) ret float %fma } @@ -406,6 +495,7 @@ define float @v_fmaak_f32(float %x, float %y) { ; GFX10: codeLenInByte = 16 ; GFX11: codeLenInByte = 16 ; GFX1200: codeLenInByte = 32 +; GFX1250: codeLenInByte = 20 define float @v_fma_k_f32_src_mods(float %x, float %y) { ; GFX9-LABEL: v_fma_k_f32_src_mods: @@ -436,6 +526,13 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] ; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43] ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_fma_k_f32_src_mods: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] %fabs.x = call float @llvm.fabs.f32(float %x) %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0) ret float %fma @@ -445,6 +542,7 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) { ; GFX10: codeLenInByte = 20 ; GFX11: codeLenInByte = 20 ; GFX1200: codeLenInByte = 36 +; GFX1250: codeLenInByte = 24 define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) { ; GFX9-LABEL: s_fmaak_f32: @@ -480,6 +578,13 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) { ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf] ; GFX1200-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e] ; GFX1200-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: s_fmaak_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43] +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf] +; GFX1250-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e] +; GFX1250-NEXT: ; return to shader part epilog %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0) ret float %fma } @@ -489,3 +594,212 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) { ; GFX1100: codeLenInByte = 16 ; GFX1150: codeLenInByte = 16 ; GFX1200: codeLenInByte = 16 +; GFX1250: codeLenInByte = 16 + +define double @v_mul_f64_vop2_literal_32(double %x) { +; GFX9-LABEL: v_mul_f64_vop2_literal_32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: s_mov_b32 s4, 0 ; encoding: [0x80,0x00,0x84,0xbe] +; GFX9-NEXT: s_mov_b32 s5, 0x405ec000 ; encoding: [0xff,0x00,0x85,0xbe,0x00,0xc0,0x5e,0x40] +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; encoding: [0x00,0x00,0x81,0xd2,0x00,0x09,0x00,0x00] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f64_vop2_literal_32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x405ec000, v[0:1] ; encoding: [0x00,0x00,0x65,0xd5,0xff,0x00,0x02,0x00,0x00,0xc0,0x5e,0x40] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] +; +; GFX11-LABEL: v_mul_f64_vop2_literal_32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] +; GFX11-NEXT: v_mul_f64 v[0:1], 0x405ec000, v[0:1] ; encoding: [0x00,0x00,0x28,0xd7,0xff,0x00,0x02,0x00,0x00,0xc0,0x5e,0x40] +; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1200-LABEL: v_mul_f64_vop2_literal_32: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] +; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] +; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] +; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1200-NEXT: v_mul_f64_e32 v[0:1], 0x405ec000, v[0:1] ; encoding: [0xff,0x00,0x00,0x0c,0x00,0xc0,0x5e,0x40] +; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f64_vop2_literal_32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 0x405ec000, v[0:1] ; encoding: [0xff,0x00,0x00,0x0c,0x00,0xc0,0x5e,0x40] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + %mul = fmul double %x, 123.0 + ret double %mul +} + +; GFX9: codeLenInByte = 28 +; GFX10: codeLenInByte = 20 +; GFX1100: codeLenInByte = 20 +; GFX1150: codeLenInByte = 20 +; GFX1250: codeLenInByte = 20 + +define double @v_mul_f64_vop2_literal_64(double %x) { +; GFX9-LABEL: v_mul_f64_vop2_literal_64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: s_mov_b32 s4, 0x66666666 ; encoding: [0xff,0x00,0x84,0xbe,0x66,0x66,0x66,0x66] +; GFX9-NEXT: s_mov_b32 s5, 0x405ec666 ; encoding: [0xff,0x00,0x85,0xbe,0x66,0xc6,0x5e,0x40] +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; encoding: [0x00,0x00,0x81,0xd2,0x00,0x09,0x00,0x00] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f64_vop2_literal_64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: s_mov_b32 s4, 0x66666666 ; encoding: [0xff,0x03,0x84,0xbe,0x66,0x66,0x66,0x66] +; GFX10-NEXT: s_mov_b32 s5, 0x405ec666 ; encoding: [0xff,0x03,0x85,0xbe,0x66,0xc6,0x5e,0x40] +; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; encoding: [0x00,0x00,0x65,0xd5,0x00,0x09,0x00,0x00] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] +; +; GFX11-LABEL: v_mul_f64_vop2_literal_64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] +; GFX11-NEXT: s_mov_b32 s0, 0x66666666 ; encoding: [0xff,0x00,0x80,0xbe,0x66,0x66,0x66,0x66] +; GFX11-NEXT: s_mov_b32 s1, 0x405ec666 ; encoding: [0xff,0x00,0x81,0xbe,0x66,0xc6,0x5e,0x40] +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; encoding: [0x09,0x00,0x87,0xbf] +; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] ; encoding: [0x00,0x00,0x28,0xd7,0x00,0x01,0x00,0x00] +; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1200-LABEL: v_mul_f64_vop2_literal_64: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] +; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] +; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] +; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1200-NEXT: s_mov_b32 s0, 0x66666666 ; encoding: [0xff,0x00,0x80,0xbe,0x66,0x66,0x66,0x66] +; GFX1200-NEXT: s_mov_b32 s1, 0x405ec666 ; encoding: [0xff,0x00,0x81,0xbe,0x66,0xc6,0x5e,0x40] +; GFX1200-NEXT: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf] +; GFX1200-NEXT: v_mul_f64_e32 v[0:1], s[0:1], v[0:1] ; encoding: [0x00,0x00,0x00,0x0c] +; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_mul_f64_vop2_literal_64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], lit64(0x405ec66666666666), v[0:1] ; encoding: [0xfe,0x00,0x00,0x0c,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + %mul = fmul double %x, 123.1 + ret double %mul +} + +; GFX9: codeLenInByte = 32 +; GFX10: codeLenInByte = 32 +; GFX1100: codeLenInByte = 36 +; GFX1150: codeLenInByte = 36 +; GFX1250: codeLenInByte = 24 + +define i64 @v_add_u64_vop2_literal_32(i64 %x) { +; GFX9-LABEL: v_add_u64_vop2_literal_32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7b, v0 ; encoding: [0xff,0x00,0x00,0x32,0x7b,0x00,0x00,0x00] +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x38] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_add_u64_vop2_literal_32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7b, v0 ; encoding: [0x00,0x6a,0x0f,0xd7,0xff,0x00,0x02,0x00,0x7b,0x00,0x00,0x00] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; encoding: [0x01,0x7d,0x28,0xd5,0x80,0x02,0xaa,0x01] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] +; +; GFX11-LABEL: v_add_u64_vop2_literal_32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7b, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x7b,0x00,0x00,0x00] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf] +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x80,0x02,0xaa,0x01] +; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1200-LABEL: v_add_u64_vop2_literal_32: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] +; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] +; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] +; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1200-NEXT: v_add_co_u32 v0, vcc_lo, 0x7b, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x7b,0x00,0x00,0x00] +; GFX1200-NEXT: s_wait_alu 0xfffd ; encoding: [0xfd,0xff,0x88,0xbf] +; GFX1200-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x80,0x02,0xaa,0x01] +; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_add_u64_vop2_literal_32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 0x7b ; encoding: [0x00,0x00,0x52,0xd6,0x00,0x01,0xfd,0x03,0x7b,0x00,0x00,0x00] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + %add = add i64 %x, 123 + ret i64 %add +} + +; GFX9: codeLenInByte = 20 +; GFX10: codeLenInByte = 28 +; GFX1100: codeLenInByte = 32 +; GFX1150: codeLenInByte = 32 +; GFX1250: codeLenInByte = 24 + +define i64 @v_add_u64_vop2_literal_64(i64 %x) { +; GFX9-LABEL: v_add_u64_vop2_literal_64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x12345678, v0 ; encoding: [0xff,0x00,0x00,0x32,0x78,0x56,0x34,0x12] +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc ; encoding: [0x81,0x02,0x02,0x38] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_add_u64_vop2_literal_64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x12345678, v0 ; encoding: [0x00,0x6a,0x0f,0xd7,0xff,0x00,0x02,0x00,0x78,0x56,0x34,0x12] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo ; encoding: [0x01,0x7d,0x28,0xd5,0x81,0x02,0xaa,0x01] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] +; +; GFX11-LABEL: v_add_u64_vop2_literal_64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x12345678, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x78,0x56,0x34,0x12] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf] +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x81,0x02,0xaa,0x01] +; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1200-LABEL: v_add_u64_vop2_literal_64: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] +; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] +; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] +; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1200-NEXT: v_add_co_u32 v0, vcc_lo, 0x12345678, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x78,0x56,0x34,0x12] +; GFX1200-NEXT: s_wait_alu 0xfffd ; encoding: [0xfd,0xff,0x88,0xbf] +; GFX1200-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x81,0x02,0xaa,0x01] +; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] +; +; GFX1250-LABEL: v_add_u64_vop2_literal_64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: s_mov_b64 s[0:1], lit64(0x112345678) ; encoding: [0xfe,0x01,0x80,0xbe,0x78,0x56,0x34,0x12,0x01,0x00,0x00,0x00] +; GFX1250-NEXT: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf] +; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] ; encoding: [0x00,0x00,0x52,0xd6,0x00,0x01,0x01,0x00] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + %add = add i64 %x, 4600387192 + ret i64 %add +} + +; GFX9: codeLenInByte = 20 +; GFX10: codeLenInByte = 28 +; GFX1100: codeLenInByte = 32 +; GFX1150: codeLenInByte = 32 +; GFX1250: codeLenInByte = 36 +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; NOT-GFX12: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fma.f64.ll b/llvm/test/CodeGen/AMDGPU/fma.f64.ll index 99f7d4da685d6..e4488258dcf88 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f64.ll @@ -1,7 +1,8 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMAC_F64 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=FUNC,GCN,FMAC_F64 %s declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone @@ -9,8 +10,8 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n declare double @llvm.fabs.f64(double) nounwind readnone ; FUNC-LABEL: {{^}}fma_f64: -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) { %r0 = load double, ptr addrspace(1) %in1 @@ -22,10 +23,10 @@ define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, } ; FUNC-LABEL: {{^}}fma_v2f64: -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) { %r0 = load <2 x double>, ptr addrspace(1) %in1 @@ -37,14 +38,14 @@ define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in } ; FUNC-LABEL: {{^}}fma_v4f64: -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) { %r0 = load <4 x double>, ptr addrspace(1) %in1 @@ -176,8 +177,8 @@ define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrs } ; FUNC-LABEL: {{^}}fma_f64_lit_src0: -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out, ptr addrspace(1) %in2, ptr addrspace(1) %in3) { %r1 = load double, ptr addrspace(1) %in2 @@ -188,8 +189,8 @@ define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out, } ; FUNC-LABEL: {{^}}fma_f64_lit_src1: -; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} -; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} +; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} +; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in3) { %r0 = load double, ptr addrspace(1) %in1 diff --git a/llvm/test/CodeGen/AMDGPU/literal64.ll b/llvm/test/CodeGen/AMDGPU/literal64.ll new file mode 100644 index 0000000000000..bb281bd6b6c12 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/literal64.ll @@ -0,0 +1,324 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GCN-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GCN-GISEL %s + +define amdgpu_ps i64 @s_add_u64(i64 inreg %a) { +; GCN-LABEL: s_add_u64: +; GCN: ; %bb.0: +; GCN-NEXT: s_add_nc_u64 s[0:1], s[0:1], lit64(0xf12345678) +; GCN-NEXT: ; return to shader part epilog + %result = add i64 %a, 64729929336 + ret i64 %result +} + +define amdgpu_ps void @v_add_u64(i64 %a, ptr addrspace(1) %out) { +; GCN-SDAG-LABEL: v_add_u64: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0xf12345678) +; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GCN-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-SDAG-NEXT: s_endpgm +; +; GCN-GISEL-LABEL: v_add_u64: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0xf12345678) +; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5] +; GCN-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-GISEL-NEXT: s_endpgm + %result = add i64 %a, 64729929336 + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps i64 @s_add_neg_u64(i64 inreg %a) { +; GCN-LABEL: s_add_neg_u64: +; GCN: ; %bb.0: +; GCN-NEXT: s_add_nc_u64 s[0:1], s[0:1], lit64(0xfffffff0edcba988) +; GCN-NEXT: ; return to shader part epilog + %result = sub i64 %a, 64729929336 + ret i64 %result +} + +define amdgpu_ps void @v_add_neg_u64(i64 %a, ptr addrspace(1) %out) { +; GCN-SDAG-LABEL: v_add_neg_u64: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0xfffffff0edcba988) +; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GCN-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-SDAG-NEXT: s_endpgm +; +; GCN-GISEL-LABEL: v_add_neg_u64: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0xfffffff0edcba988) +; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5] +; GCN-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-GISEL-NEXT: s_endpgm + %result = sub i64 %a, 64729929336 + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps i64 @s_sub_u64(i64 inreg %a) { +; GCN-LABEL: s_sub_u64: +; GCN: ; %bb.0: +; GCN-NEXT: s_sub_nc_u64 s[0:1], lit64(0xf12345678), s[0:1] +; GCN-NEXT: ; return to shader part epilog + %result = sub i64 64729929336, %a + ret i64 %result +} + +define amdgpu_ps void @v_sub_u64(i64 %a, ptr addrspace(1) %out) { +; GCN-LABEL: v_sub_u64: +; GCN: ; %bb.0: +; GCN-NEXT: v_sub_co_u32 v0, vcc_lo, 0x12345678, v0 +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-NEXT: v_sub_co_ci_u32_e64 v1, null, 15, v1, vcc_lo +; GCN-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-NEXT: s_endpgm + %result = sub i64 64729929336, %a + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +define void @v_mov_b64_double(ptr addrspace(1) %ptr) { +; GCN-LABEL: v_mov_b64_double: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: global_load_b64 v[4:5], v[0:1], off +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: .LBB6_1: ; %atomicrmw.start +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: v_add_f64_e32 v[2:3], lit64(0x4063233333333333), v[4:5] +; GCN-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5] +; GCN-NEXT: s_wait_xcnt 0x0 +; GCN-NEXT: v_mov_b64_e32 v[4:5], v[2:3] +; GCN-NEXT: s_wait_alu 0xfffe +; GCN-NEXT: s_or_b32 s0, vcc_lo, s0 +; GCN-NEXT: s_wait_alu 0xfffe +; GCN-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GCN-NEXT: s_cbranch_execnz .LBB6_1 +; GCN-NEXT: ; %bb.2: ; %atomicrmw.end +; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fadd ptr addrspace(1) %ptr, double 153.1 monotonic + ret void +} + +define void @v_mov_b64_int(ptr addrspace(1) %ptr) { +; GCN-LABEL: v_mov_b64_int: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0xf12345678) +; GCN-NEXT: global_atomic_add_u64 v[0:1], v[2:3], off scope:SCOPE_SYS +; GCN-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw add ptr addrspace(1) %ptr, i64 64729929336 monotonic + ret void +} + +define void @store_double(ptr addrspace(1) %ptr) { +; GCN-LABEL: store_double: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4063233333333333) +; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off +; GCN-NEXT: s_set_pc_i64 s[30:31] + store double 153.1, ptr addrspace(1) %ptr + ret void +} + +define i1 @class_f64() noinline optnone { +; GCN-SDAG-LABEL: class_f64: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 +; GCN-SDAG-NEXT: s_mov_b32 s2, 1 +; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0x4063233333333333) +; GCN-SDAG-NEXT: s_wait_alu 0xfffe +; GCN-SDAG-NEXT: v_cmp_class_f64_e64 s0, s[0:1], s2 +; GCN-SDAG-NEXT: s_wait_alu 0xf1ff +; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GCN-GISEL-LABEL: class_f64: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 +; GCN-GISEL-NEXT: s_mov_b32 s2, 1 +; GCN-GISEL-NEXT: s_mov_b64 s[0:1], lit64(0x4063233333333333) +; GCN-GISEL-NEXT: s_wait_alu 0xfffe +; GCN-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GCN-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GCN-GISEL-NEXT: v_cmp_class_f64_e64 s0, v[0:1], v2 +; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GCN-GISEL-NEXT: s_wait_alu 0xf1ff +; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, v0, s0 +; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31] + %result = call i1 @llvm.amdgcn.class.f64(double 153.1, i32 1) nounwind readnone + ret i1 %result +} + +define double @rsq_f64() { +; GCN-LABEL: rsq_f64: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_rsq_f64_e32 v[0:1], lit64(0x4063233333333333) +; GCN-NEXT: s_set_pc_i64 s[30:31] + %result = call double @llvm.amdgcn.rsq.f64(double 153.1) nounwind readnone + ret double %result +} + +define amdgpu_ps i64 @s_and_b64(i64 inreg %a) { +; GCN-LABEL: s_and_b64: +; GCN: ; %bb.0: +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], lit64(0xf12345678) +; GCN-NEXT: ; return to shader part epilog + %result = and i64 %a, 64729929336 + ret i64 %result +} + +; No V_AND_B64 instruction, it has to be split + +define amdgpu_ps void @v_and_b64(i64 %a, ptr addrspace(1) %out) { +; GCN-SDAG-LABEL: v_and_b64: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: v_and_b32_e32 v1, 15, v1 +; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0x12345678, v0 +; GCN-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-SDAG-NEXT: s_endpgm +; +; GCN-GISEL-LABEL: v_and_b64: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: v_and_b32_e32 v0, 0x12345678, v0 +; GCN-GISEL-NEXT: v_and_b32_e32 v1, 15, v1 +; GCN-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off +; GCN-GISEL-NEXT: s_endpgm + %result = and i64 %a, 64729929336 + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps <2 x float> @v_add_f64_200.1(double %a) { +; GCN-LABEL: v_add_f64_200.1: +; GCN: ; %bb.0: +; GCN-NEXT: v_add_f64_e32 v[0:1], lit64(0x4069033333333333), v[0:1] +; GCN-NEXT: ; return to shader part epilog + %add = fadd double %a, 200.1 + %ret = bitcast double %add to <2 x float> + ret <2 x float> %ret +} + +; 200.0 can be encoded as 32-bit literal + +define amdgpu_ps <2 x float> @v_add_f64_200.0(double %a) { +; GCN-LABEL: v_add_f64_200.0: +; GCN: ; %bb.0: +; GCN-NEXT: v_add_f64_e32 v[0:1], 0x40690000, v[0:1] +; GCN-NEXT: ; return to shader part epilog + %add = fadd double %a, 200.0 + %ret = bitcast double %add to <2 x float> + ret <2 x float> %ret +} + +; No folding into VOP3 + +define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) { +; GCN-SDAG-LABEL: v_lshl_add_u64: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0xf12345678) +; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, s[0:1] +; GCN-SDAG-NEXT: ; return to shader part epilog +; +; GCN-GISEL-LABEL: v_lshl_add_u64: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0xf12345678) +; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, v[2:3] +; GCN-GISEL-NEXT: ; return to shader part epilog + %shl = shl i64 %a, 1 + %add = add i64 %shl, 64729929336 + %ret = bitcast i64 %add to <2 x float> + ret <2 x float> %ret +} + +; No folding into VOP2 promoted to VOP3 + +define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) { +; GCN-LABEL: v_fma_f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333) +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3] +; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333) +; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3] +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5] +; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 +; GCN-NEXT: ; return to shader part epilog + %r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone + %r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone + %r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone + %ret = bitcast double %r3 to <2 x float> + ret <2 x float> %ret +} + +define amdgpu_ps <2 x float> @v_add_neg_f64(double %a) { +; GCN-SDAG-LABEL: v_add_neg_f64: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0x4069033333333333) +; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-SDAG-NEXT: v_add_f64_e64 v[0:1], -v[0:1], s[0:1] +; GCN-SDAG-NEXT: ; return to shader part epilog +; +; GCN-GISEL-LABEL: v_add_neg_f64: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333) +; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-GISEL-NEXT: v_add_f64_e64 v[0:1], -v[0:1], v[2:3] +; GCN-GISEL-NEXT: ; return to shader part epilog + %fneg = fsub double -0.0, %a + %add = fadd double %fneg, 200.1 + %ret = bitcast double %add to <2 x float> + ret <2 x float> %ret +} + +define amdgpu_ps <2 x float> @v_cndmask(double %a) { +; GCN-SDAG-LABEL: v_cndmask: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[0:1] +; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x40632000 +; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x33333333, 0, vcc_lo +; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x40690333, v1, vcc_lo +; GCN-SDAG-NEXT: ; return to shader part epilog +; +; GCN-GISEL-LABEL: v_cndmask: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[0:1] +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x40690333 +; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x33333333, 0, vcc_lo +; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GCN-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0x40632000, vcc_lo +; GCN-GISEL-NEXT: ; return to shader part epilog + %cmp = fcmp oeq double %a, 0.0 + %sel = select i1 %cmp, double 153.0, double 200.1 + %ret = bitcast double %sel to <2 x float> + ret <2 x float> %ret +} + +declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone +declare double @llvm.amdgcn.rsq.f64(double) nounwind readnone +declare double @llvm.fma.f64(double, double, double) nounwind readnone diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s index cc14e4caf851e..20bc578605b8c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s @@ -2,6 +2,158 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s +v_fmac_f64 v[4:5], v[2:3], v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] ; encoding: [0x02,0x09,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[254:255], v[2:3], v[4:5] +// GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x2f] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[254:255], v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], vcc, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[4:5] ; encoding: [0x6a,0x08,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], exec, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], 0, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], 0, v[4:5] ; encoding: [0x80,0x08,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], -1, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], -1, v[4:5] ; encoding: [0xc1,0x08,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], 0.5, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[4:5] ; encoding: [0xf0,0x08,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], -4.0, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[4:5] ; encoding: [0xf7,0x08,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], 0xaf123456, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], 0x3f717273, v[4:5] +// GFX1250: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[254:255] +// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[8:9] ; encoding: [0x02,0x11,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[254:255], v[2:3], v[8:9] +// GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[8:9] ; encoding: [0x02,0x11,0xfc,0x2f] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[254:255], v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[8:9] ; encoding: [0xfe,0x11,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], vcc, v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[8:9] ; encoding: [0x6a,0x10,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], exec, v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], exec, v[8:9] ; encoding: [0x7e,0x10,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], 0, v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], 0, v[8:9] ; encoding: [0x80,0x10,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], -1, v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], -1, v[8:9] ; encoding: [0xc1,0x10,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], 0.5, v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[8:9] ; encoding: [0xf0,0x10,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], -4.0, v[8:9] +// GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[8:9] ; encoding: [0xf7,0x10,0x08,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[254:255] +// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], vcc +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], vcc ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xd5,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], exec +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], exec ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xfd,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], 0 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x01,0x01,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], -1 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -1 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x83,0x01,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], 0.5 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0.5 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xe1,0x01,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], -4.0 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -4.0 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xef,0x01,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], -v[2:3], v[8:9] +// GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x20] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], -v[8:9] +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], -v[2:3], -v[8:9] +// GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x60] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], |v[2:3]|, v[8:9] +// GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x17,0xd5,0x02,0x11,0x02,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], |v[8:9]| +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x17,0xd5,0x02,0x11,0x02,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], |v[2:3]|, |v[8:9]| +// GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x17,0xd5,0x02,0x11,0x02,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[8:9] clamp +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x17,0xd5,0x02,0x11,0x02,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[8:9] mul:2 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x08] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[8:9] mul:4 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x10] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmac_f64 v[4:5], v[2:3], v[8:9] div:2 +// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x18] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] // GFX1250: v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] ; encoding: [0x04,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt index f0fcddb06599f..c1213f2d9ec0d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt @@ -1,6 +1,117 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +0x02,0x09,0xfc,0x2f +# GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x2f] + +0x02,0x11,0xfc,0x2f +# GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[8:9] ; encoding: [0x02,0x11,0xfc,0x2f] + +0xc1,0x08,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], -1, v[4:5] ; encoding: [0xc1,0x08,0x08,0x2e] + +0xc1,0x10,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], -1, v[8:9] ; encoding: [0xc1,0x10,0x08,0x2e] + +0xf7,0x08,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[4:5] ; encoding: [0xf7,0x08,0x08,0x2e] + +0xf7,0x10,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[8:9] ; encoding: [0xf7,0x10,0x08,0x2e] + +0x80,0x08,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], 0, v[4:5] ; encoding: [0x80,0x08,0x08,0x2e] + +0x80,0x10,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], 0, v[8:9] ; encoding: [0x80,0x10,0x08,0x2e] + +0xf0,0x08,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[4:5] ; encoding: [0xf0,0x08,0x08,0x2e] + +0xf0,0x10,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[8:9] ; encoding: [0xf0,0x10,0x08,0x2e] + +0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f +# GFX1250: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f] + +0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf +# GFX1250: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf] + +0x7e,0x08,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x2e] + +0x7e,0x10,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], exec, v[8:9] ; encoding: [0x7e,0x10,0x08,0x2e] + +0xfe,0x09,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x2e] + +0xfe,0x11,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[8:9] ; encoding: [0xfe,0x11,0x08,0x2e] + +0x02,0xfd,0x09,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e] + +0x02,0x09,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] ; encoding: [0x02,0x09,0x08,0x2e] + +0x02,0x11,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[8:9] ; encoding: [0x02,0x11,0x08,0x2e] + +0x6a,0x08,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[4:5] ; encoding: [0x6a,0x08,0x08,0x2e] + +0x6a,0x10,0x08,0x2e +# GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[8:9] ; encoding: [0x6a,0x10,0x08,0x2e] + +0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x60 +# GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x60] + +0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x20 +# GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x20] + +0x04,0x00,0x17,0xd5,0x02,0x83,0x01,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -1 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x83,0x01,0x00] + +0x04,0x00,0x17,0xd5,0x02,0xef,0x01,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -4.0 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xef,0x01,0x00] + +0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x40 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x40] + +0x04,0x00,0x17,0xd5,0x02,0x01,0x01,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x01,0x01,0x00] + +0x04,0x00,0x17,0xd5,0x02,0xe1,0x01,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0.5 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xe1,0x01,0x00] + +0x04,0x00,0x17,0xd5,0x02,0xfd,0x00,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], exec ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xfd,0x00,0x00] + +0x04,0x80,0x17,0xd5,0x02,0x11,0x02,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x17,0xd5,0x02,0x11,0x02,0x00] + +0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x18 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x18] + +0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x08 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x08] + +0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x10 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x10] + +0x04,0x00,0x17,0xd5,0x02,0xd5,0x00,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], vcc ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xd5,0x00,0x00] + +0x04,0x02,0x17,0xd5,0x02,0x11,0x02,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x17,0xd5,0x02,0x11,0x02,0x00] + +0x04,0x01,0x17,0xd5,0x02,0x11,0x02,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x17,0xd5,0x02,0x11,0x02,0x00] + +0x04,0x03,0x17,0xd5,0x02,0x11,0x02,0x00 +# GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x17,0xd5,0x02,0x11,0x02,0x00] + 0xfe,0xfc,0xfd,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 # GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[254:255], 0x405ec000 ; encoding: [0xfe,0xfc,0xfd,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]