-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU][MC] Fix disassembly for v_permlane16_swap_b32 for GFX950 #146600
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][MC] Fix disassembly for v_permlane16_swap_b32 for GFX950 #146600
Conversation
When targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem.
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Jun Wang (jwanggit86) ChangesWhen targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem. Full diff: https://github.com/llvm/llvm-project/pull/146600.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 02b912bcfb9e0..8ce15e497e82e 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -389,8 +389,8 @@ def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> {
let HasExtDPP = 0;
let HasExtSDWA = 0;
- let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0);
- let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
+ let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0);
+ let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
let Asm64 = "$vdst, $src0$bound_ctrl$fi";
}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 7bc47914f40b7..57fc573b37ba9 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -40,14 +40,26 @@ global_load_lds_dwordx4 v2, s[4:5] offset:4
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
v_permlane16_swap_b32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
+v_permlane16_swap_b32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
v_permlane16_swap_b32_e32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
+v_permlane16_swap_b32_e32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
v_permlane16_swap_b32_e64 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+v_permlane16_swap_b32_e64 v218, v219
+
// FIXME: Parsed as bound_ctrl:1?
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
@@ -81,14 +93,26 @@ v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
v_permlane32_swap_b32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
+v_permlane32_swap_b32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
v_permlane32_swap_b32_e32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
+v_permlane32_swap_b32_e32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
v_permlane32_swap_b32_e64 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+v_permlane32_swap_b32_e64 v218, v219
+
// FIXME: Parsed as bound_ctrl:1?
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
index 9fc9c58387b90..01821593b0707 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
@@ -47,9 +47,27 @@
# GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
0x02,0xb3,0x02,0x7e
+# GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
+0xdb,0xb3,0xb4,0x7f
+
+# GFX950: v_permlane16_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb3,0xb4,0x7f]
+0x02,0xb3,0xb4,0x7f
+
+# GFX950: v_permlane16_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb3,0x04,0x7e]
+0xdb,0xb3,0x04,0x7e
+
# GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
+# GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
+
+# GFX950: v_permlane16_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
+0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
+
+# GFX950: v_permlane16_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
+
# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00
@@ -63,9 +81,27 @@
# GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
0x02,0xb5,0x02,0x7e
+# GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
+0xdb,0xb5,0xb4,0x7f
+
+# GFX950: v_permlane32_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb5,0xb4,0x7f]
+0x02,0xb5,0xb4,0x7f
+
+# GFX950: v_permlane32_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb5,0x04,0x7e]
+0xdb,0xb5,0x04,0x7e
+
# GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
+# GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
+
+# GFX950: v_permlane32_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
+0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
+
+# GFX950: v_permlane32_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
+
# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00
|
|
Some explanations. With the previous def of We can see that This fails while trying to decode 218 (v218 encoded in 8 bits) as SrcReg9 (9 bit value). |
|
With the fix, the decoding code is: |
Sisyph
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); | ||
| let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0); | ||
| let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); | ||
| let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a side question, is it necessary to have InsVOP3OpSel here? It is not typical to have to override both InsVOP3OpSel and Ins64 in a profile.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. I'll look into it.
When targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem.