-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU][MC] Fix disassemble of image_gather4 with d16 #114609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][MC] Fix disassemble of image_gather4 with d16 #114609
Conversation
For GFX10+, image_gather4 instructions that have v[254:255] as dst reg and the d16 bit on can be assembled correctly but the generated binary fails to disassemble. This patch fixes this problem.
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Jun Wang (jwanggit86) ChangesFor GFX10+, image_gather4 instructions that have v[254:255] as dst reg and the d16 bit on can be assembled correctly but the generated binary fails to disassemble. This patch fixes this problem. Full diff: https://github.com/llvm/llvm-project/pull/114609.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 5c49a8116ae7fc..dbbc466aee100c 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1508,9 +1508,9 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
Gather4 = 1 in {
let VDataDwords = 2 in
- defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
+ defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64, 1>; /* for packed D16 only */
let VDataDwords = 4 in
- defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
+ defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
let VDataDwords = 5 in
defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
}
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
index 8f3da34ea24739..5079f9a3216d41 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
@@ -443,6 +443,12 @@
# GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+# GFX10: image_gather4 v[252:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00]
+0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00
+
+# image_gather4 v[254:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfe,0x62,0x80]
+0x08,0x08,0x00,0xf1,0x01,0xfe,0x62,0x80
+
# GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
index 46488a9aa4ec70..2ced1e829aa843 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
@@ -255,6 +255,12 @@
# GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64]
0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64
+# GFX11: image_gather4 v[254:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x04,0x08,0xbe,0xf0,0x01,0xfe,0x02,0x0c]
+0x04,0x08,0xbe,0xf0,0x01,0xfe,0x02,0x0c
+
+# GFX11: image_gather4 v[252:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x08,0xbc,0xf0,0x01,0xfc,0x02,0x0c]
+0x04,0x08,0xbc,0xf0,0x01,0xfc,0x02,0x0c
+
# GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64]
0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt
index e03849ffa83834..ecc7b09bbe915e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt
@@ -510,6 +510,11 @@
# GFX12: image_gather4 v[35:38], [v39, v40], s[48:55], s[76:79] dmask:0x2 dim:SQ_RSRC_IMG_2D lwe ; encoding: [0x01,0xc0,0x8b,0xe4,0x23,0x61,0x00,0x26,0x27,0x28,0x00,0x00]
0x01,0xc0,0x8b,0xe4,0x23,0x61,0x00,0x26,0x27,0x28,0x00,0x00
+# GFX12: image_gather4 v[254:255], [v1, v2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x21,0xc0,0x0b,0xe6,0xfe,0x10,0x00,0x06,0x01,0x02,0x00,0x00]
+0x21,0xc0,0x0b,0xe6,0xfe,0x10,0x00,0x06,0x01,0x02,0x00,0x00
+
+# GFX12: image_gather4 v[252:255], [v1, v2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00]
+0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00
# GFX12: image_gather4_l v[64:67], [v32, v33, v34], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0x4c,0xe4,0x40,0x08,0x00,0x02,0x20,0x21,0x22,0x00]
0x01,0x00,0x4c,0xe4,0x40,0x08,0x00,0x02,0x20,0x21,0x22,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 0a5bafc55f4d43..346ec5853b6c77 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -235,5 +235,5 @@
# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x3 ; encoding: [0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00]
0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00
-# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00]
+# VI: image_gather4 v[252:253], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00]
0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00
|
| Gather4 = 1 in { | ||
| let VDataDwords = 2 in | ||
| defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */ | ||
| defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64, 1>; /* for packed D16 only */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use true and comment operand name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
| # GFX10: image_gather4 v[252:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00] | ||
| 0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00 | ||
|
|
||
| # image_gather4 v[254:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfe,0x62,0x80] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing GFX10: prefix
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
| 0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00 | ||
|
|
||
| # VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00] | ||
| # VI: image_gather4 v[252:253], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previous default was V4 variant so it would start with VReg_128: v[252:255]
Now default is V2 so it starts with VReg_64: v[252:253]
And I think both fail when trying to convert to V5 and VReg_160: v[252:256] which would be invalid.
I'm not sure what the original test was checking since this is one of "incorrect" encodings
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on comment at line 231 it was probably testing that with tfe it will go out of range.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So really this should fail to disassemble, in the same way as any other instruction that uses a vgpr tuple that goes higher than v255. Right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This currently matches sp3. It does fail on encoding as does sp3.
Looking into it further it seems that we do have inconsistent behavior.
We do not decode VOP when a register is out of bounds (gfx1010 example):
# v_add_f64 v[255:256], v[1:2], v[2:3]
0xff,0x00,0x64,0xd5,0x01,0x05,0x02,0x00
But we do MIMG (gfx1010 example):
# image_load v65, v[253:256], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm
0x38,0x18,0x00,0xf0,0xfd,0x41,0x00,0x00
except llvm produces just v253. Sp3 always decodes to instructions above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For invalid encodings, I would prefer that we consistently either fail to decode, or print something like v[252:256] /* VGPR tuple out of range */. But that would be for another patch.
I have no objection to the current patch if it fixes disassembly of valid encodings.
| 0x21,0xc0,0x0b,0xe6,0xfe,0x10,0x00,0x06,0x01,0x02,0x00,0x00 | ||
|
|
||
| # GFX12: image_gather4 v[252:255], [v1, v2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00] | ||
| 0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missing new line
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
mbrkusanin
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits
For GFX10+, image_gather4 instructions that have v[254:255] as dst reg and the d16 bit on can be assembled correctly but the generated binary fails to disassemble. This patch fixes this problem.