-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[WIP][AMDGPU][MC] Support 128b rsrc reg in mimg instructions #139121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 8 commits
c9f698f
ef7494c
0eac0e8
a503c3c
5c3b75e
73836d7
218f4e7
cc05ff1
59bfdb4
45b1a3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -1781,6 +1781,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { | |||||
| bool validateMIMGD16(const MCInst &Inst); | ||||||
| bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands); | ||||||
| bool validateMIMGMSAA(const MCInst &Inst); | ||||||
| bool validateMIMGR128(const MCInst &Inst, const OperandVector &Operands); | ||||||
| bool validateOpSel(const MCInst &Inst); | ||||||
| bool validateTrue16OpSel(const MCInst &Inst); | ||||||
| bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName); | ||||||
|
|
@@ -3974,6 +3975,64 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, | |||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst, | ||||||
| const OperandVector &Operands) { | ||||||
| const unsigned Opc = Inst.getOpcode(); | ||||||
| const MCInstrDesc &Desc = MII.get(Opc); | ||||||
|
|
||||||
| if ((Desc.TSFlags & MIMGFlags) == 0) | ||||||
| return true; | ||||||
|
|
||||||
| // image_bvh_intersect_ray instructions only support 128b RSRC reg | ||||||
| if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) | ||||||
| return true; | ||||||
|
|
||||||
| AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) | ||||||
| ? AMDGPU::OpName::srsrc | ||||||
| : AMDGPU::OpName::rsrc; | ||||||
| int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); | ||||||
| assert(SrsrcIdx != -1); | ||||||
|
|
||||||
| auto RsrcReg = Inst.getOperand(SrsrcIdx).getReg(); | ||||||
|
|
||||||
| unsigned SrsrcRegSize = 4; | ||||||
| if (getMRI()->getRegClass(AMDGPU::SReg_256_XNULLRegClassID).contains(RsrcReg)) | ||||||
| SrsrcRegSize = 8; | ||||||
| else { | ||||||
| switch (RsrcReg.id()) { | ||||||
| case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_vi: | ||||||
| case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_vi: | ||||||
| case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi: | ||||||
| case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_gfx9plus: | ||||||
| case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_gfx9plus: | ||||||
| case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9plus: | ||||||
| SrsrcRegSize = 8; | ||||||
| break; | ||||||
| default: | ||||||
| break; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128); | ||||||
| bool IsR128 = | ||||||
| (hasMIMG_R128() && R128Idx != -1 && Inst.getOperand(R128Idx).getImm()); | ||||||
|
|
||||||
| if (SrsrcRegSize == 8 && IsR128) { | ||||||
| auto Loc = getImmLoc(AMDGPUOperand::ImmTyR128A16, Operands); | ||||||
| Error(Loc, "r128 not allowed with 256-bit RSRC reg"); | ||||||
| return false; | ||||||
| } else if (SrsrcRegSize == 4 && !IsR128) { | ||||||
|
||||||
| auto Loc = getInstLoc(Operands); | ||||||
| if (hasMIMG_R128()) | ||||||
| Error(Loc, | ||||||
| "the RSRC reg should be 256-bit, or the r128 flag is required"); | ||||||
|
||||||
| "the RSRC reg should be 256-bit, or the r128 flag is required"); | |
| "rsrc reg should be 256-bit, or the r128 flag is required"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -922,6 +922,15 @@ defm "" : SRegClass<16, Reg512Types.types, SGPR_512Regs, TTMP_512Regs>; | |
| defm "" : SRegClass<32, Reg1024Types.types, SGPR_1024Regs>; | ||
| } | ||
|
|
||
| def SReg_RSRC : SIRegisterClass<"AMDGPU", [v8i32], 32, | ||
| (add SReg_256_XNULL, SReg_128_XNULL)> { | ||
| let Size = 8; | ||
|
Comment on lines
+925
to
+927
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this imply that a value of type v8i32 can be stored in a SReg_128_XNULL register? I do not see any codegen tests with the new 128 rsrc size. This definition in fine for assembler/disassembler parts but I'm not sure if this works for codegen so please try some tests. We've tried before to use a custom RegisterClass with registers of different sizes and run into an issue where codegen would sometimes pick wrong register class. That case however had a mix of SGPRs and VGPRs. |
||
| let CopyCost = -1; | ||
| let isAllocatable = 0; | ||
| let HasSGPR = 1; | ||
| let BaseClassOrder = 10000; | ||
| } | ||
|
|
||
| def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, | ||
| (add VGPR_32, LDS_DIRECT_CLASS)> { | ||
| let isAllocatable = 0; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there ought to be a class test you can perform for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
getMCReg(), registers such as TTMP0_TTMP1..._TTMP7 are converted to the ones with a suffix "_vi" or "_gfx9plus". Those new registers don't seem to belong to any reg class.