[AMDGPU][MC] Fix disassemble of image_gather4 with d16 #114609

jwanggit86 · 2024-11-01T21:58:58Z

For GFX10+, image_gather4 instructions that have v[254:255] as dst reg and the d16 bit on can be assembled correctly but the generated binary fails to disassemble. This patch fixes this problem.

llvmbot · 2024-11-01T21:59:16Z

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Jun Wang (jwanggit86)

Changes

For GFX10+, image_gather4 instructions that have v[254:255] as dst reg and the d16 bit on can be assembled correctly but the generated binary fails to disassemble. This patch fixes this problem.

Full diff: https://github.com/llvm/llvm-project/pull/114609.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt (+6)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt (+6)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt (+5)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt (+1-1)

diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 5c49a8116ae7fc..dbbc466aee100c 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1508,9 +1508,9 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
   let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
       Gather4 = 1 in {
     let VDataDwords = 2 in
-    defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
+    defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64, 1>; /* for packed D16 only */
     let VDataDwords = 4 in
-    defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
+    defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
     let VDataDwords = 5 in
     defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
   }
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
index 8f3da34ea24739..5079f9a3216d41 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
@@ -443,6 +443,12 @@
 # GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
 0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
 
+# GFX10: image_gather4 v[252:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00]
+0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00
+
+# image_gather4 v[254:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfe,0x62,0x80]
+0x08,0x08,0x00,0xf1,0x01,0xfe,0x62,0x80
+
 # GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
 0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
index 46488a9aa4ec70..2ced1e829aa843 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
@@ -255,6 +255,12 @@
 # GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64]
 0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64
 
+# GFX11: image_gather4 v[254:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x04,0x08,0xbe,0xf0,0x01,0xfe,0x02,0x0c]
+0x04,0x08,0xbe,0xf0,0x01,0xfe,0x02,0x0c
+
+# GFX11: image_gather4 v[252:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x08,0xbc,0xf0,0x01,0xfc,0x02,0x0c]
+0x04,0x08,0xbc,0xf0,0x01,0xfc,0x02,0x0c
+
 # GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64]
 0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt
index e03849ffa83834..ecc7b09bbe915e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt
@@ -510,6 +510,11 @@
 # GFX12: image_gather4 v[35:38], [v39, v40], s[48:55], s[76:79] dmask:0x2 dim:SQ_RSRC_IMG_2D lwe ; encoding: [0x01,0xc0,0x8b,0xe4,0x23,0x61,0x00,0x26,0x27,0x28,0x00,0x00]
 0x01,0xc0,0x8b,0xe4,0x23,0x61,0x00,0x26,0x27,0x28,0x00,0x00
 
+# GFX12: image_gather4 v[254:255], [v1, v2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x21,0xc0,0x0b,0xe6,0xfe,0x10,0x00,0x06,0x01,0x02,0x00,0x00]
+0x21,0xc0,0x0b,0xe6,0xfe,0x10,0x00,0x06,0x01,0x02,0x00,0x00
+
+# GFX12: image_gather4 v[252:255], [v1, v2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00]
+0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00
 # GFX12: image_gather4_l v[64:67], [v32, v33, v34], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0x4c,0xe4,0x40,0x08,0x00,0x02,0x20,0x21,0x22,0x00]
 0x01,0x00,0x4c,0xe4,0x40,0x08,0x00,0x02,0x20,0x21,0x22,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 0a5bafc55f4d43..346ec5853b6c77 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -235,5 +235,5 @@
 # VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x3 ; encoding: [0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00]
 0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00
 
-# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00]
+# VI: image_gather4 v[252:253], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00]
 0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00

arsenm · 2024-11-01T22:02:55Z

llvm/lib/Target/AMDGPU/MIMGInstructions.td

      Gather4 = 1 in {
    let VDataDwords = 2 in
-    defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
+    defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64, 1>; /* for packed D16 only */


Use true and comment operand name?

jayfoad · 2024-11-04T09:48:53Z

llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt

+# GFX10: image_gather4 v[252:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00]
+0x08,0x08,0x00,0xf1,0x01,0xfc,0x62,0x00
+
+# image_gather4 v[254:255], v[1:2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x08,0x08,0x00,0xf1,0x01,0xfe,0x62,0x80]


Missing GFX10: prefix

jayfoad · 2024-11-04T09:50:05Z

llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt

 0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00

-# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00]
+# VI: image_gather4 v[252:253], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00]


Why does this change?

Previous default was V4 variant so it would start with VReg_128: v[252:255]
Now default is V2 so it starts with VReg_64: v[252:253]
And I think both fail when trying to convert to V5 and VReg_160: v[252:256] which would be invalid.
I'm not sure what the original test was checking since this is one of "incorrect" encodings

Based on comment at line 231 it was probably testing that with tfe it will go out of range.

So really this should fail to disassemble, in the same way as any other instruction that uses a vgpr tuple that goes higher than v255. Right?

This currently matches sp3. It does fail on encoding as does sp3.

Looking into it further it seems that we do have inconsistent behavior.

We do not decode VOP when a register is out of bounds (gfx1010 example):

# v_add_f64 v[255:256], v[1:2], v[2:3] 0xff,0x00,0x64,0xd5,0x01,0x05,0x02,0x00

But we do MIMG (gfx1010 example):

# image_load v65, v[253:256], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm 0x38,0x18,0x00,0xf0,0xfd,0x41,0x00,0x00

except llvm produces just v253. Sp3 always decodes to instructions above.

For invalid encodings, I would prefer that we consistently either fail to decode, or print something like v[252:256] /* VGPR tuple out of range */. But that would be for another patch.

I have no objection to the current patch if it fixes disassembly of valid encodings.

mbrkusanin · 2024-11-04T12:17:47Z

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt

+0x21,0xc0,0x0b,0xe6,0xfe,0x10,0x00,0x06,0x01,0x02,0x00,0x00
+
+# GFX12: image_gather4 v[252:255], [v1, v2], s[8:15], s[12:15] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00]
+0x01,0xc0,0x0b,0xe6,0xfc,0x10,0x00,0x06,0x01,0x02,0x00,0x00


missing new line

mbrkusanin

LGTM with nits

[AMDGPU][MC] Fix disassemble of image_gather4 with d16

4b15647

For GFX10+, image_gather4 instructions that have v[254:255] as dst reg and the d16 bit on can be assembled correctly but the generated binary fails to disassemble. This patch fixes this problem.

jwanggit86 added backend:AMDGPU llvm:mc Machine (object) code labels Nov 1, 2024

jwanggit86 requested a review from arsenm November 1, 2024 21:58

jwanggit86 linked an issue Nov 1, 2024 that may be closed by this pull request

[AMDGPU][MC][GFX10] MIMG gather4 opcodes failed to disassemble when d16 is specified #51065

Closed

arsenm reviewed Nov 1, 2024

View reviewed changes

jayfoad reviewed Nov 4, 2024

View reviewed changes

mbrkusanin reviewed Nov 4, 2024

View reviewed changes

mbrkusanin approved these changes Nov 4, 2024

View reviewed changes

Some minor changes.

1e0dbb0

jwanggit86 requested review from arsenm and jayfoad November 4, 2024 23:46

arsenm approved these changes Nov 4, 2024

View reviewed changes

jwanggit86 merged commit 7780cf0 into llvm:main Nov 5, 2024
8 checks passed

jwanggit86 deleted the fix-disassembler-for-image-gather4-with-d16 branch November 5, 2024 18:37

[AMDGPU][MC] Fix disassemble of image_gather4 with d16 #114609

[AMDGPU][MC] Fix disassemble of image_gather4 with d16 #114609

Uh oh!

Conversation

jwanggit86 commented Nov 1, 2024

Uh oh!

llvmbot commented Nov 1, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mbrkusanin left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

llvmbot commented Nov 1, 2024 •

edited

Loading