From 07fefca48791c113a65e068c83a8ffdefc6f5dfb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 27 Aug 2025 16:19:23 +0900 Subject: [PATCH 1/4] AMDGPU: Fix fixme for out of bounds indexing in usesConstantBus check This loop over all the operands in the MachineInstr will eventually go past the end of the MCInstrDesc's explicit operands. We don't need the instr desc to compute the constant bus usage, just the register and whether it's implicit or not. The check here is slightly conservative. e.g. a random vcc implicit use appended to an instruction will falsely report a constant bus use. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 62 ++++++++++++++++---------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 ++ 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index c5e8f95748cf1..4cf8fd5eb594f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4758,6 +4758,35 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, return Inst32; } +bool SIInstrInfo::physRegUsesConstantBus(const MachineOperand &RegOp) const { + // Null is free + Register Reg = RegOp.getReg(); + if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64) + return false; + + // SGPRs use the constant bus + + // FIXME: implicit registers that are not part of the MCInstrDesc's implicit + // physical register operands should also count. + if (RegOp.isImplicit()) + return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0; + + // Normal exec read does not count. + if ((Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO) && RegOp.isImplicit()) + return false; + + // SGPRs use the constant bus + return AMDGPU::SReg_32RegClass.contains(Reg) || + AMDGPU::SReg_64RegClass.contains(Reg); +} + +bool SIInstrInfo::regUsesConstantBus(const MachineOperand &RegOp, + const MachineRegisterInfo &MRI) const { + Register Reg = RegOp.getReg(); + return Reg.isVirtual() ? RI.isSGPRClass(MRI.getRegClass(Reg)) + : physRegUsesConstantBus(RegOp); +} + bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const { @@ -4765,23 +4794,9 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, if (!MO.isReg()) return !isInlineConstant(MO, OpInfo); - if (!MO.isUse()) - return false; - - if (MO.getReg().isVirtual()) - return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); - - // Null is free - if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64) - return false; - - // SGPRs use the constant bus - if (MO.isImplicit()) { - return MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || - MO.getReg() == AMDGPU::VCC_LO; - } - return AMDGPU::SReg_32RegClass.contains(MO.getReg()) || - AMDGPU::SReg_64RegClass.contains(MO.getReg()); + Register Reg = MO.getReg(); + return Reg.isVirtual() ? RI.isSGPRClass(MRI.getRegClass(Reg)) + : physRegUsesConstantBus(MO); } static Register findImplicitSGPRRead(const MachineInstr &MI) { @@ -6250,13 +6265,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, continue; const MachineOperand &Op = MI.getOperand(i); if (Op.isReg()) { - RegSubRegPair SGPR(Op.getReg(), Op.getSubReg()); - if (!SGPRsUsed.count(SGPR) && - // FIXME: This can access off the end of the operands() array. - usesConstantBus(MRI, Op, InstDesc.operands().begin()[i])) { - if (--ConstantBusLimit <= 0) - return false; - SGPRsUsed.insert(SGPR); + if (Op.isUse()) { + RegSubRegPair SGPR(Op.getReg(), Op.getSubReg()); + if (regUsesConstantBus(Op, MRI) && SGPRsUsed.insert(SGPR).second) { + if (--ConstantBusLimit <= 0) + return false; + } } } else if (AMDGPU::isSISrcOperand(InstDesc, i) && !isInlineConstant(Op, InstDesc.operands()[i])) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 958af0ff1147f..2f9f5c54406a3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1195,6 +1195,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { /// This function will return false if you pass it a 32-bit instruction. bool hasVALU32BitEncoding(unsigned Opcode) const; + bool physRegUsesConstantBus(const MachineOperand &Reg) const; + bool regUsesConstantBus(const MachineOperand &Reg, + const MachineRegisterInfo &MRI) const; + /// Returns true if this operand uses the constant bus. bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, From ec7dbe2f8987ae08d94788b2d19511b471bd735e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 28 Aug 2025 13:43:12 +0900 Subject: [PATCH 2/4] merge exec check --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4cf8fd5eb594f..d3bda9f3875e3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4767,14 +4767,10 @@ bool SIInstrInfo::physRegUsesConstantBus(const MachineOperand &RegOp) const { // SGPRs use the constant bus // FIXME: implicit registers that are not part of the MCInstrDesc's implicit - // physical register operands should also count. + // physical register operands should also count, except for exec. if (RegOp.isImplicit()) return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0; - // Normal exec read does not count. - if ((Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO) && RegOp.isImplicit()) - return false; - // SGPRs use the constant bus return AMDGPU::SReg_32RegClass.contains(Reg) || AMDGPU::SReg_64RegClass.contains(Reg); From 20442f6adbd765db0493edabef85228b56b0a1ef Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 27 Aug 2025 15:35:53 +0900 Subject: [PATCH 3/4] AMDGPU: Refactor isImmOperandLegal The goal is to expose more variants that can operate without preconstructed MachineInstrs or MachineOperands. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 38 ++++++++++++------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 6 +++ .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 7 ---- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 9 ++++- 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d3bda9f3875e3..887092182f7d1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4572,19 +4572,24 @@ static bool compareMachineOp(const MachineOperand &Op0, } } -bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, - const MachineOperand &MO) const { - const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; - - assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); - +bool SIInstrInfo::isLiteralOperandLegal(const MCInstrDesc &InstDesc, + const MCOperandInfo &OpInfo) const { if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) return true; - if (OpInfo.RegClass < 0) + if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) return false; - if (MO.isImm() && isInlineConstant(MO, OpInfo)) { + if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(OpInfo)) + return true; + + return ST.hasVOP3Literal(); +} + +bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, + int64_t ImmVal) const { + const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; + if (isInlineConstant(ImmVal, OpInfo.OperandType)) { if (isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() && OpNo == (unsigned)AMDGPU::getNamedOperandIdx(InstDesc.getOpcode(), AMDGPU::OpName::src2)) @@ -4592,13 +4597,18 @@ bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, return RI.opCanUseInlineConstant(OpInfo.OperandType); } - if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) - return false; + return isLiteralOperandLegal(InstDesc, OpInfo); +} - if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) - return true; +bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, + const MachineOperand &MO) const { + if (MO.isImm()) + return isImmOperandLegal(InstDesc, OpNo, MO.getImm()); - return ST.hasVOP3Literal(); + assert((MO.isTargetIndex() || MO.isFI() || MO.isGlobal()) && + "unexpected imm-like operand kind"); + const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; + return isLiteralOperandLegal(InstDesc, OpInfo); } bool SIInstrInfo::isLegalAV64PseudoImm(uint64_t Imm) const { @@ -6268,7 +6278,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } - } else if (AMDGPU::isSISrcOperand(InstDesc, i) && + } else if (AMDGPU::isSISrcOperand(InstDesc.operands()[i]) && !isInlineConstant(Op, InstDesc.operands()[i])) { // The same literal may be used multiple times. if (!UsedLiteral) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 2f9f5c54406a3..1070d4824aa14 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1183,6 +1183,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const; + bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, + const MCOperandInfo &OpInfo) const; + + bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, + int64_t ImmVal) const; + bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const { return isImmOperandLegal(MI.getDesc(), OpNo, MO); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 18ee9c16b3ff9..da19a6faa9e0f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2720,13 +2720,6 @@ bool isInlineValue(unsigned Reg) { #undef CASE_GFXPRE11_GFX11PLUS_TO #undef MAP_REG2REG -bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { - assert(OpNo < Desc.NumOperands); - unsigned OpType = Desc.operands()[OpNo].OperandType; - return OpType >= AMDGPU::OPERAND_SRC_FIRST && - OpType <= AMDGPU::OPERAND_SRC_LAST; -} - bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.operands()[OpNo].OperandType; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 70dfb63cbe040..7c5c1e85b2014 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1590,7 +1590,14 @@ bool isInlineValue(unsigned Reg); /// Is this an AMDGPU specific source operand? These include registers, /// inline constants, literals and mandatory literals (KImm). -bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); +constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) { + return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST && + OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST; +} + +constexpr bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { + return isSISrcOperand(Desc.operands()[OpNo]); +} /// Is this a KImm operand? bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); From 05821956deebe21b8dd2bdd0a5962a0987d42775 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 26 Aug 2025 23:53:57 +0900 Subject: [PATCH 4/4] AMDGPU: Fold 64-bit immediate into copy to AV class This is in preparation for patches which will intoduce more copies to av registers. --- llvm/lib/Target/AMDGPU/SIDefines.h | 10 +-- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 25 ++++-- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 +- .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 85 ++++++++----------- llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 26 +++--- 5 files changed, 70 insertions(+), 82 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 268b153c6c924..150e05b59c29f 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -237,16 +237,16 @@ enum OperandType : unsigned { OPERAND_REG_INLINE_AC_FP32, OPERAND_REG_INLINE_AC_FP64, + // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline + // constants. Does not accept registers. + OPERAND_INLINE_C_AV64_PSEUDO, + // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, // Operand for SDWA instructions OPERAND_SDWA_VOPC_DST, - // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline - // constants. - OPERAND_INLINE_C_AV64_PSEUDO, - OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, @@ -254,7 +254,7 @@ enum OperandType : unsigned { OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64, OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32, - OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64, + OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO, OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index a116b57c85a88..92eaa8b29ccb8 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand( for (unsigned MovOp : {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64, AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64, - AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) { + AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO, + AMDGPU::AV_MOV_B64_IMM_PSEUDO}) { const MCInstrDesc &MovDesc = TII->get(MovOp); assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1); @@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand( const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1; const TargetRegisterClass *MovSrcRC = TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass); - - if (UseSubReg) - MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg); - if (!MRI->constrainRegClass(SrcReg, MovSrcRC)) - break; + if (MovSrcRC) { + if (UseSubReg) + MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg); + if (!MRI->constrainRegClass(SrcReg, MovSrcRC)) + break; + + // FIXME: This is mutating the instruction only and deferring the actual + // fold of the immediate + } else { + // For the _IMM_PSEUDO cases, there can be value restrictions on the + // immediate to verify. Technically we should always verify this, but it + // only matters for these concrete cases. + // TODO: Handle non-imm case if it's useful. + if (!OpToFold.isImm() || + !TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal())) + break; + } MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 887092182f7d1..2b187c641da1c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { case AMDGPU::V_ACCVGPR_READ_B32_e64: case AMDGPU::V_ACCVGPR_MOV_B32: case AMDGPU::AV_MOV_B32_IMM_PSEUDO: - return true; case AMDGPU::AV_MOV_B64_IMM_PSEUDO: - // TODO: We could fold this, but it's a strange case. The immediate value - // can't be directly folded into any real use. We would have to spread new - // immediate legality checks around and only accept subregister extracts for - // profitability. + return true; default: return false; } diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir index 6f2e33900a79a..73cdcddbef135 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir @@ -7,9 +7,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 - ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] - ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec %1:areg_64_align2 = COPY %0 @@ -24,9 +23,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 - ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] - ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -1, implicit $exec + ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec %1:areg_64_align2 = COPY %0 @@ -125,9 +123,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_av_64 - ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[V_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec %1:av_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -226,9 +223,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 0, implicit $exec %1:areg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -241,9 +237,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64_align2 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 0, implicit $exec %1:areg_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -256,9 +251,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 -16, implicit $exec %1:areg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -271,9 +265,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64_align2 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 -16, implicit $exec %1:areg_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -286,9 +279,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 0, implicit $exec %1:av_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -301,9 +293,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64_align2 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 0, implicit $exec %1:av_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -316,9 +307,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 -16, implicit $exec %1:av_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -331,9 +321,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64_align2 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64 -16, implicit $exec %1:av_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -346,9 +335,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64 - ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec %1:areg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -361,9 +349,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2 - ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec %1:areg_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -376,9 +363,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64 - ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec %1:areg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -391,9 +377,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2 - ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec %1:areg_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 @@ -406,9 +391,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64 - ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec %1:av_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -421,9 +405,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2 - ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec %1:av_64_align2 = COPY %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir index ddf2aa34ecd87..dfcf9a1f5c5ae 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir @@ -816,9 +816,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_agpr - ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[AV_MOV_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] %0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec %1:areg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -832,9 +831,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_0 - ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]] %0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec %1:vreg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -848,9 +846,9 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value - ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[AV_MOV_:%[0-9]+]]:vreg_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO [[AV_MOV_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]] %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec %1:vreg_64 = COPY %0 S_ENDPGM 0, implicit %1 @@ -863,9 +861,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub0 - ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 17, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec %1:vgpr_32 = COPY %0.sub0 S_ENDPGM 0, implicit %1 @@ -878,9 +875,8 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub1 - ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec %1:vgpr_32 = COPY %0.sub1 S_ENDPGM 0, implicit %1