Skip to content

Conversation

@jwanggit86
Copy link
Contributor

When targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem.

When targeting GFX950, disassembly of v_permlane16_swap_b32 and
v_permlane32_swap_b32 instructions produces errors when they use
certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219.
This patch fixes this problem.
@jwanggit86 jwanggit86 requested review from Sisyph and rampitec July 1, 2025 20:59
@jwanggit86 jwanggit86 added backend:AMDGPU llvm:mc Machine (object) code labels Jul 1, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 1, 2025

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Jun Wang (jwanggit86)

Changes

When targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem.


Full diff: https://github.com/llvm/llvm-project/pull/146600.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2-2)
  • (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+24)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950.txt (+36)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 02b912bcfb9e0..8ce15e497e82e 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -389,8 +389,8 @@ def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> {
   let HasExtDPP = 0;
   let HasExtSDWA = 0;
 
-  let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0);
-  let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
+  let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0);
+  let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
   let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
   let Asm64 = "$vdst, $src0$bound_ctrl$fi";
 }
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 7bc47914f40b7..57fc573b37ba9 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -40,14 +40,26 @@ global_load_lds_dwordx4 v2, s[4:5] offset:4
 // GFX950: v_permlane16_swap_b32_e32 v1, v2        ; encoding: [0x02,0xb3,0x02,0x7e]
 v_permlane16_swap_b32 v1, v2
 
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e32 v218, v219    ; encoding: [0xdb,0xb3,0xb4,0x7f]
+v_permlane16_swap_b32 v218, v219
+
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_permlane16_swap_b32_e32 v1, v2        ; encoding: [0x02,0xb3,0x02,0x7e]
 v_permlane16_swap_b32_e32 v1, v2
 
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e32 v218, v219    ; encoding: [0xdb,0xb3,0xb4,0x7f]
+v_permlane16_swap_b32_e32 v218, v219
+
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_permlane16_swap_b32_e64 v1, v2        ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
 v_permlane16_swap_b32_e64 v1, v2
 
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e64 v218, v219    ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+v_permlane16_swap_b32_e64 v218, v219
+
 // FIXME: Parsed as bound_ctrl:1?
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
@@ -81,14 +93,26 @@ v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1
 // GFX950: v_permlane32_swap_b32_e32 v1, v2        ; encoding: [0x02,0xb5,0x02,0x7e]
 v_permlane32_swap_b32 v1, v2
 
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e32 v218, v219    ; encoding: [0xdb,0xb5,0xb4,0x7f]
+v_permlane32_swap_b32 v218, v219
+
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_permlane32_swap_b32_e32 v1, v2        ; encoding: [0x02,0xb5,0x02,0x7e]
 v_permlane32_swap_b32_e32 v1, v2
 
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e32 v218, v219    ; encoding: [0xdb,0xb5,0xb4,0x7f]
+v_permlane32_swap_b32_e32 v218, v219
+
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_permlane32_swap_b32_e64 v1, v2        ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
 v_permlane32_swap_b32_e64 v1, v2
 
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e64 v218, v219    ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+v_permlane32_swap_b32_e64 v218, v219
+
 // FIXME: Parsed as bound_ctrl:1?
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
index 9fc9c58387b90..01821593b0707 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
@@ -47,9 +47,27 @@
 # GFX950:   v_permlane16_swap_b32_e32 v1, v2        ; encoding: [0x02,0xb3,0x02,0x7e]
 0x02,0xb3,0x02,0x7e
 
+# GFX950: v_permlane16_swap_b32_e32 v218, v219    ; encoding: [0xdb,0xb3,0xb4,0x7f]
+0xdb,0xb3,0xb4,0x7f
+
+# GFX950: v_permlane16_swap_b32_e32 v218, v2      ; encoding: [0x02,0xb3,0xb4,0x7f]
+0x02,0xb3,0xb4,0x7f
+
+# GFX950: v_permlane16_swap_b32_e32 v2, v219      ; encoding: [0xdb,0xb3,0x04,0x7e]
+0xdb,0xb3,0x04,0x7e
+
 # GFX950:   v_permlane16_swap_b32_e64 v1, v2        ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
 0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
 
+# GFX950: v_permlane16_swap_b32_e64 v218, v219    ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
+
+# GFX950: v_permlane16_swap_b32_e64 v218, v2      ; encoding: [0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
+0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
+
+# GFX950: v_permlane16_swap_b32_e64 v2, v219      ; encoding: [0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
+
 # GFX950:   v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
 0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00
 
@@ -63,9 +81,27 @@
 # GFX950:   v_permlane32_swap_b32_e32 v1, v2        ; encoding: [0x02,0xb5,0x02,0x7e]
 0x02,0xb5,0x02,0x7e
 
+# GFX950: v_permlane32_swap_b32_e32 v218, v219    ; encoding: [0xdb,0xb5,0xb4,0x7f]
+0xdb,0xb5,0xb4,0x7f
+
+# GFX950: v_permlane32_swap_b32_e32 v218, v2      ; encoding: [0x02,0xb5,0xb4,0x7f]
+0x02,0xb5,0xb4,0x7f
+
+# GFX950: v_permlane32_swap_b32_e32 v2, v219      ; encoding: [0xdb,0xb5,0x04,0x7e]
+0xdb,0xb5,0x04,0x7e
+
 # GFX950:   v_permlane32_swap_b32_e64 v1, v2        ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
 0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
 
+# GFX950: v_permlane32_swap_b32_e64 v218, v219    ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
+
+# GFX950: v_permlane32_swap_b32_e64 v218, v2      ; encoding: [0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
+0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
+
+# GFX950: v_permlane32_swap_b32_e64 v2, v219      ; encoding: [0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
+
 # GFX950:   v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
 0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00
 

@jwanggit86
Copy link
Contributor Author

Some explanations. With the previous def of VOP_PERMLANE_SWAP, the pseudo instruction v_permlane16_swap_b32_e32 has the following:

dag OutOperandList = (outs anonymous_9783:$vdst, VRegSrc_32:$src0_out);
dag InOperandList = (ins VRegSrc_32:$vdst_in, VRegSrc_32:$src0);

We can see that $vdst_in has a different type than $vdst. The decoding code in decodeToMCInst is as follows:

  case 1390:
    tmp = fieldFromInstruction(insn, 17, 8);  // tmp = 218 for v218
    if (!Check(S, DecodeVGPR_32RegisterClass(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }  // creates Reg 707
    tmp = fieldFromInstruction(insn, 0, 9);  // tmp=475 for v219
    if (!Check(S, decodeSrcReg9<32>(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }  //creates Reg 708
    tmp = fieldFromInstruction(insn, 17, 8);  // tmp=218 for v218
    if (!Check(S, decodeSrcReg9<32>(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; } // Fail here because v218 is not SrcReg9
    tmp = fieldFromInstruction(insn, 0, 9);
    if (!Check(S, decodeSrcReg9<32>(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }
    return S;

This fails while trying to decode 218 (v218 encoded in 8 bits) as SrcReg9 (9 bit value).

@jwanggit86
Copy link
Contributor Author

With the fix, the decoding code is:

  case 1390:
    tmp = fieldFromInstruction(insn, 17, 8);
    if (!Check(S, DecodeVGPR_32RegisterClass(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }
    tmp = fieldFromInstruction(insn, 0, 9);
    if (!Check(S, decodeSrcReg9<32>(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }
    tmp = fieldFromInstruction(insn, 17, 8);
    if (!Check(S, DecodeVGPR_32RegisterClass(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }  **// This line is different.**
    tmp = fieldFromInstruction(insn, 0, 9);
    if (!Check(S, decodeSrcReg9<32>(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }
    return S;

Copy link
Contributor

@Sisyph Sisyph left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0);
let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a side question, is it necessary to have InsVOP3OpSel here? It is not typical to have to override both InsVOP3OpSel and Ins64 in a profile.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I'll look into it.

@jwanggit86 jwanggit86 merged commit 641ad52 into llvm:main Jul 2, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:mc Machine (object) code

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants