From c594f5daf392c6bfc94f50d71753cee39f27941c Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 2 Sep 2025 13:19:01 -0700 Subject: [PATCH] [AMDGPU] Add s_set_vgpr_msb gfx1250 instruction --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 ++++++++++ .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 20 +++++++++++++++++++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +++ llvm/lib/Target/AMDGPU/SOPInstructions.td | 8 ++++++++ llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s | 8 ++++++++ llvm/test/MC/AMDGPU/gfx1250_err.s | 10 ++++++++++ .../Disassembler/AMDGPU/gfx1250_dasm_sopp.txt | 6 ++++++ 7 files changed, 65 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 1038797374de3..0e0b84f7e3374 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1210,6 +1210,12 @@ def Feature64BitLiterals : SubtargetFeature<"64-bit-literals", "Can use 64-bit literals with single DWORD instructions" >; +def Feature1024AddressableVGPRs : SubtargetFeature<"1024-addressable-vgprs", + "Has1024AddressableVGPRs", + "true", + "Has 1024 addressable VGPRs" +>; + def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt", "HasWaitXcnt", "true", @@ -2033,6 +2039,7 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureCUStores, FeatureAddressableLocalMemorySize327680, FeatureCuMode, + Feature1024AddressableVGPRs, Feature64BitLiterals, FeatureLDSBankCount32, FeatureDLInsts, @@ -2841,6 +2848,9 @@ def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">, def Has64BitLiterals : Predicate<"Subtarget->has64BitLiterals()">, AssemblerPredicate<(all_of Feature64BitLiterals)>; +def Has1024AddressableVGPRs : Predicate<"Subtarget->has1024AddressableVGPRs()">, + AssemblerPredicate<(all_of Feature1024AddressableVGPRs)>; + def HasWaitXcnt : Predicate<"Subtarget->hasWaitXcnt()">, AssemblerPredicate<(all_of FeatureWaitXcnt)>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a23c2fc8f2e65..93083f2660c2d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1886,6 +1886,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, const unsigned CPol); bool validateTFE(const MCInst &Inst, const OperandVector &Operands); + bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands); std::optional validateLdsDirect(const MCInst &Inst); bool validateWMMA(const MCInst &Inst, const OperandVector &Operands); unsigned getConstantBusLimit(unsigned Opcode) const; @@ -5542,6 +5543,22 @@ bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, return true; } +bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst, + const OperandVector &Operands) { + if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12) + return true; + + int Simm16Pos = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16); + if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) { + SMLoc Loc = Operands[1]->getStartLoc(); + Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst, const OperandVector &Operands) { unsigned Opc = Inst.getOpcode(); @@ -5706,6 +5723,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateTFE(Inst, Operands)) { return false; } + if (!validateSetVgprMSB(Inst, Operands)) { + return false; + } if (!validateWMMA(Inst, Operands)) { return false; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index fb12da513d359..4475c8d1d1602 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -236,6 +236,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasPseudoScalarTrans = false; bool HasRestrictedSOffset = false; bool Has64BitLiterals = false; + bool Has1024AddressableVGPRs = false; bool HasBitOp3Insts = false; bool HasTanhInsts = false; bool HasTensorCvtLutInsts = false; @@ -1437,6 +1438,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasAddPC64Inst() const { return GFX1250Insts; } + bool has1024AddressableVGPRs() const { return Has1024AddressableVGPRs; } + bool hasMinimum3Maximum3PKF16() const { return HasMinimum3Maximum3PKF16; } diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 3097409eff12d..dbe0b8c496fed 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1844,6 +1844,13 @@ let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in { SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">; } // End SubtargetPredicate = hasWaitXcnt, hasSideEffects = 1 +let SubtargetPredicate = Has1024AddressableVGPRs in { + def S_SET_VGPR_MSB : SOPP_Pseudo<"s_set_vgpr_msb" , (ins i16imm:$simm16), "$simm16"> { + let hasSideEffects = 1; + let Defs = [MODE]; + } +} + //===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// @@ -2691,6 +2698,7 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>; //===----------------------------------------------------------------------===// // SOPP - GFX1250 only. //===----------------------------------------------------------------------===// +defm S_SET_VGPR_MSB : SOPP_Real_32_gfx12<0x006>; defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>; defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>; defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>; diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s index bfc3544ac1b15..4f7ca47705eb2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s @@ -37,6 +37,14 @@ s_setprio_inc_wg 100 // GFX1250: [0x64,0x00,0xbe,0xbf] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +s_set_vgpr_msb 10 +// GFX1250: [0x0a,0x00,0x86,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_set_vgpr_msb 255 +// GFX1250: [0xff,0x00,0x86,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + s_monitor_sleep 1 // GFX1250: s_monitor_sleep 1 ; encoding: [0x01,0x00,0x84,0xbf] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_err.s b/llvm/test/MC/AMDGPU/gfx1250_err.s index 676eb48cc5a7f..9d1131ef9fb7a 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_err.s @@ -1,5 +1,15 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX1250-ERR --implicit-check-not=error: -strict-whitespace %s +s_set_vgpr_msb -1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: s_set_vgpr_msb accepts values in range [0..255] +// GFX1250-ERR: s_set_vgpr_msb -1 +// GFX1250-ERR: ^ + +s_set_vgpr_msb 256 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: s_set_vgpr_msb accepts values in range [0..255] +// GFX1250-ERR: s_set_vgpr_msb 256 +// GFX1250-ERR: ^ + s_load_b32 s4, s[2:3], 10 th:TH_LOAD_NT th:TH_LOAD_NT // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // GFX1250-ERR: s_load_b32 s4, s[2:3], 10 th:TH_LOAD_NT th:TH_LOAD_NT diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt index af94fbc7824a5..a8627d64001c2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt @@ -27,6 +27,12 @@ # GFX1250: s_setprio_inc_wg 0x64 ; encoding: [0x64,0x00,0xbe,0xbf] 0x64,0x00,0xbe,0xbf +# GFX1250: s_set_vgpr_msb 10 ; encoding: [0x0a,0x00,0x86,0xbf] +0x0a,0x00,0x86,0xbf + +# GFX1250: s_set_vgpr_msb 0xff ; encoding: [0xff,0x00,0x86,0xbf] +0xff,0x00,0x86,0xbf + # GFX1250: s_monitor_sleep 0 ; encoding: [0x00,0x00,0x84,0xbf] 0x00,0x00,0x84,0xbf