AMDGPU: Directly use align2 classes in gfx90a mimg operands

arsenm · web-flow · commit be1510f9bb2d · 2025-09-06T10:05:33.000+09:00
(llvm#157037) This regresses the assembler diagnostics. I made some attempts at avoiding this, but it turns out the way we manage these is really wrong. We're completely ignoring the reported missing features from MatchInstructionImpl and also don't have properly configured predicates to automatically get the message.
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -436,7 +436,7 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
                                     RegisterClass dst_rc,
                                     RegisterClass addr_rc,
                                     string dns="">
-  : MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
+  : MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperandAlign2<dst_rc>.ret:$vdata), dns> {
   let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
                                 DMask:$dmask, UNorm:$unorm, CPol:$cpol,
                                 R128A16:$r128, LWE:$lwe, DA:$da),
@@ -578,7 +578,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
       if op.HAS_GFX10M then {
         def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
         if !not(ExtendedImageInst) then
-        def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_64>;
+        def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_64_Align2>;
         def _V2_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_64>;
         def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 2>;
       }
@@ -602,7 +602,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
       if op.HAS_GFX10M then {
         def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
         if !not(ExtendedImageInst) then
-        def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_96>;
+        def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_96_Align2>;
         def _V3_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_96>;
         def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 3>;
       }
@@ -626,7 +626,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
       if op.HAS_GFX10M then {
         def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
         if !not(ExtendedImageInst) then
-        def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_128>;
+        def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_128_Align2>;
         def _V4_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_128>;
         def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 4,
                                                      !if(enableDisasm, "GFX10", "")>;
@@ -694,7 +694,7 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
                                 RegisterClass addr_rc,
                                 string dns = "">
   : MIMG_gfx90a<op.GFX10M, (outs), dns> {
-  let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
+  let InOperandList = !con((ins getLdStRegisterOperandAlign2<data_rc>.ret:$vdata,
                                 addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
                                 DMask:$dmask, UNorm:$unorm, CPol:$cpol,
                                 R128A16:$r128, LWE:$lwe, DA:$da),
@@ -797,7 +797,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
       let ssamp = 0 in {
         if op.HAS_GFX10M then {
           def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
-          def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64>;
+          def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64_Align2>;
           def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
           def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
         }
@@ -814,7 +814,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
       let ssamp = 0 in {
         if op.HAS_GFX10M then {
           def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
-          def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96>;
+          def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96_Align2>;
           def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
           def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
         }
@@ -831,7 +831,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
       let ssamp = 0 in {
         if op.HAS_GFX10M then {
           def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
-          def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128>;
+          def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128_Align2>;
           def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
           def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
                                                           !if(enableDisasm, "GFX10", "")>;
@@ -885,10 +885,10 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
 
 class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
                                RegisterClass addr_rc, string dns="">
-  : MIMG_gfx90a <op, (outs getLdStRegisterOperand<data_rc>.ret:$vdst), dns> {
+  : MIMG_gfx90a <op, (outs getLdStRegisterOperandAlign2<data_rc>.ret:$vdst), dns> {
   let Constraints = "$vdst = $vdata";
 
-  let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
+  let InOperandList = (ins getLdStRegisterOperandAlign2<data_rc>.ret:$vdata,
                            addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
                            DMask:$dmask, UNorm:$unorm, CPol:$cpol,
                            R128A16:$r128, LWE:$lwe, DA:$da);
@@ -1022,7 +1022,7 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
         }
         if op.HAS_VI then {
           def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
-          def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64, 0>;
+          def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64_Align2, 0>;
         }
         if op.HAS_GFX10M then {
           def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
@@ -1044,7 +1044,7 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
         }
         if op.HAS_VI then {
           def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
-          def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96, 0>;
+          def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96_Align2, 0>;
         }
         if op.HAS_GFX10M then {
           def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
@@ -1066,7 +1066,7 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
         }
         if op.HAS_VI then {
           def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
-          def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128, 0>;
+          def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128_Align2, 0>;
         }
         if op.HAS_GFX10M then {
           def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
@@ -1140,7 +1140,7 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
 
 class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
                           RegisterClass src_rc, string dns="">
-  : MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
+  : MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperandAlign2<dst_rc>.ret:$vdata), dns> {
   let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
                                 DMask:$dmask, UNorm:$unorm, CPol:$cpol,
                                 R128A16:$r128, LWE:$lwe, DA:$da),
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2604,6 +2604,23 @@ class getLdStRegisterOperandForVT<ValueType VT> {
   RegisterOperand ret = getLdStRegisterOperandForSize<VT.Size>.ret;
 }
 
+class getLdStRegisterOperandAlign2<RegisterClass RC> {
+  // This type of operands is only used in pseudo instructions helping
+  // code generation and thus doesn't need encoding and decoding methods.
+  // It also doesn't need to support AGPRs, because GFX908/A/40 do not
+  // support True16.
+  defvar VLdSt_16 = RegisterOperand<VGPR_16>;
+
+  RegisterOperand ret =
+    !cond(!eq(RC.Size, 16)   : VLdSt_16,
+          !eq(RC.Size, 32)   : AVLdSt_32,
+          !eq(RC.Size, 64)   : AVLdSt_64_Align2,
+          !eq(RC.Size, 96)   : AVLdSt_96_Align2,
+          !eq(RC.Size, 128)  : AVLdSt_128_Align2,
+          !eq(RC.Size, 160)  : AVLdSt_160_Align2,
+          !eq(RC.Size, 1024) : AVLdSt_1024_Align2);
+}
+
 class getEquivalentAGPRClass<RegisterClass RC> {
   RegisterClass ret =
     !cond(!eq(RC.Size, 32)   : AGPR_32,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1432,16 +1432,15 @@ def AVDst_512 : AVDstOperand<AV_512>;
 class AVLdStOperand<RegisterClass regClass>
   : AVOperand<regClass, "decodeAVLdSt">;
 
-// TODO: These cases should use target align variant
 def AVLdSt_32 : AVLdStOperand<AV_32>;
-def AVLdSt_64 : AVLdStOperand<AV_64>;
-def AVLdSt_96 : AVLdStOperand<AV_96>;
-def AVLdSt_128 : AVLdStOperand<AV_128>;
-def AVLdSt_160 : AVLdStOperand<AV_160>;
-def AVLdSt_1024 : AVLdStOperand<AV_1024>;
-
-def AVLdSt_96_Align1 : AVLdStOperand<AV_96>;
-def AVLdSt_96_Align2 : AVLdStOperand<AV_96_Align2>;
+
+foreach size = ["64", "96", "128", "160", "256", "1024" ] in {
+  // TODO: These cases should use target align variant
+  def AVLdSt_#size : AVLdStOperand<!cast<RegisterClass>("AV_"#size)>;
+
+  def AVLdSt_#size#_Align1 : AVLdStOperand<!cast<RegisterClass>("AV_"#size)>;
+  def AVLdSt_#size#_Align2 : AVLdStOperand<!cast<RegisterClass>("AV_"#size#_Align2)>;
+}
 
 //===----------------------------------------------------------------------===//
 //  ACSrc_* Operands with an AGPR or an inline constant
diff --git a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
@@ -23,74 +23,74 @@ global_load_dwordx4 a[1:4], v[0:1], off
 
 
 image_load v[1:2], v2, s[0:7] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_load v[1:3], v2, s[0:7] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_load v[1:4], v2, s[0:7] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_load a[1:2], v2, s[0:7] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load a[1:3], v2, s[0:7] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load a[1:4], v2, s[0:7] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 image_store v[193:194], v[238:241], s[28:35] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_store v[193:195], v[238:241], s[28:35] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_store v[193:196], v[238:241], s[28:35] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_store a[193:194], v[238:241], s[28:35] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store a[193:195], v[238:241], s[28:35] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store a[193:196], v[238:241], s[28:35] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 image_atomic_swap v4, v[193:196], s[28:35] dmask:0x1 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_swap v[5:6], v1, s[8:15] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 
 image_atomic_cmpswap v[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_cmpswap v[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_cmpswap v[5:8], v[192:195], s[28:35] dmask:0xf unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_cmpswap v[4:7], v[193:196], s[28:35] dmask:0xf unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 
 image_atomic_cmpswap a[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap a[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap a[5:8], v[192:195], s[28:35] dmask:0xf unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap a[4:7], v[193:196], s[28:35] dmask:0xf unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 v_mfma_f32_32x32x8f16 a[0:15], a[1:2], v[0:1], a[0:15]