Skip to content

Commit be1510f

Browse files
authored
AMDGPU: Directly use align2 classes in gfx90a mimg operands
(llvm#157037) This regresses the assembler diagnostics. I made some attempts at avoiding this, but it turns out the way we manage these is really wrong. We're completely ignoring the reported missing features from MatchInstructionImpl and also don't have properly configured predicates to automatically get the message.
1 parent f8a77f3 commit be1510f

File tree

4 files changed

+61
-45
lines changed

4 files changed

+61
-45
lines changed

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
436436
RegisterClass dst_rc,
437437
RegisterClass addr_rc,
438438
string dns="">
439-
: MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
439+
: MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperandAlign2<dst_rc>.ret:$vdata), dns> {
440440
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
441441
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
442442
R128A16:$r128, LWE:$lwe, DA:$da),
@@ -578,7 +578,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
578578
if op.HAS_GFX10M then {
579579
def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
580580
if !not(ExtendedImageInst) then
581-
def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_64>;
581+
def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_64_Align2>;
582582
def _V2_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_64>;
583583
def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 2>;
584584
}
@@ -602,7 +602,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
602602
if op.HAS_GFX10M then {
603603
def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
604604
if !not(ExtendedImageInst) then
605-
def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_96>;
605+
def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_96_Align2>;
606606
def _V3_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_96>;
607607
def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 3>;
608608
}
@@ -626,7 +626,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
626626
if op.HAS_GFX10M then {
627627
def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
628628
if !not(ExtendedImageInst) then
629-
def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_128>;
629+
def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_128_Align2>;
630630
def _V4_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_128>;
631631
def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 4,
632632
!if(enableDisasm, "GFX10", "")>;
@@ -694,7 +694,7 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
694694
RegisterClass addr_rc,
695695
string dns = "">
696696
: MIMG_gfx90a<op.GFX10M, (outs), dns> {
697-
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
697+
let InOperandList = !con((ins getLdStRegisterOperandAlign2<data_rc>.ret:$vdata,
698698
addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
699699
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
700700
R128A16:$r128, LWE:$lwe, DA:$da),
@@ -797,7 +797,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
797797
let ssamp = 0 in {
798798
if op.HAS_GFX10M then {
799799
def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
800-
def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64>;
800+
def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64_Align2>;
801801
def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
802802
def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
803803
}
@@ -814,7 +814,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
814814
let ssamp = 0 in {
815815
if op.HAS_GFX10M then {
816816
def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
817-
def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96>;
817+
def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96_Align2>;
818818
def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
819819
def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
820820
}
@@ -831,7 +831,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
831831
let ssamp = 0 in {
832832
if op.HAS_GFX10M then {
833833
def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
834-
def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128>;
834+
def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128_Align2>;
835835
def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
836836
def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
837837
!if(enableDisasm, "GFX10", "")>;
@@ -885,10 +885,10 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
885885

886886
class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
887887
RegisterClass addr_rc, string dns="">
888-
: MIMG_gfx90a <op, (outs getLdStRegisterOperand<data_rc>.ret:$vdst), dns> {
888+
: MIMG_gfx90a <op, (outs getLdStRegisterOperandAlign2<data_rc>.ret:$vdst), dns> {
889889
let Constraints = "$vdst = $vdata";
890890

891-
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
891+
let InOperandList = (ins getLdStRegisterOperandAlign2<data_rc>.ret:$vdata,
892892
addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
893893
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
894894
R128A16:$r128, LWE:$lwe, DA:$da);
@@ -1022,7 +1022,7 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
10221022
}
10231023
if op.HAS_VI then {
10241024
def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
1025-
def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64, 0>;
1025+
def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64_Align2, 0>;
10261026
}
10271027
if op.HAS_GFX10M then {
10281028
def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
@@ -1044,7 +1044,7 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
10441044
}
10451045
if op.HAS_VI then {
10461046
def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
1047-
def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96, 0>;
1047+
def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96_Align2, 0>;
10481048
}
10491049
if op.HAS_GFX10M then {
10501050
def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
@@ -1066,7 +1066,7 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
10661066
}
10671067
if op.HAS_VI then {
10681068
def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
1069-
def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128, 0>;
1069+
def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128_Align2, 0>;
10701070
}
10711071
if op.HAS_GFX10M then {
10721072
def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
@@ -1140,7 +1140,7 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
11401140

11411141
class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
11421142
RegisterClass src_rc, string dns="">
1143-
: MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
1143+
: MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperandAlign2<dst_rc>.ret:$vdata), dns> {
11441144
let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
11451145
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
11461146
R128A16:$r128, LWE:$lwe, DA:$da),

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2604,6 +2604,23 @@ class getLdStRegisterOperandForVT<ValueType VT> {
26042604
RegisterOperand ret = getLdStRegisterOperandForSize<VT.Size>.ret;
26052605
}
26062606

2607+
class getLdStRegisterOperandAlign2<RegisterClass RC> {
2608+
// This type of operands is only used in pseudo instructions helping
2609+
// code generation and thus doesn't need encoding and decoding methods.
2610+
// It also doesn't need to support AGPRs, because GFX908/A/40 do not
2611+
// support True16.
2612+
defvar VLdSt_16 = RegisterOperand<VGPR_16>;
2613+
2614+
RegisterOperand ret =
2615+
!cond(!eq(RC.Size, 16) : VLdSt_16,
2616+
!eq(RC.Size, 32) : AVLdSt_32,
2617+
!eq(RC.Size, 64) : AVLdSt_64_Align2,
2618+
!eq(RC.Size, 96) : AVLdSt_96_Align2,
2619+
!eq(RC.Size, 128) : AVLdSt_128_Align2,
2620+
!eq(RC.Size, 160) : AVLdSt_160_Align2,
2621+
!eq(RC.Size, 1024) : AVLdSt_1024_Align2);
2622+
}
2623+
26072624
class getEquivalentAGPRClass<RegisterClass RC> {
26082625
RegisterClass ret =
26092626
!cond(!eq(RC.Size, 32) : AGPR_32,

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,16 +1432,15 @@ def AVDst_512 : AVDstOperand<AV_512>;
14321432
class AVLdStOperand<RegisterClass regClass>
14331433
: AVOperand<regClass, "decodeAVLdSt">;
14341434

1435-
// TODO: These cases should use target align variant
14361435
def AVLdSt_32 : AVLdStOperand<AV_32>;
1437-
def AVLdSt_64 : AVLdStOperand<AV_64>;
1438-
def AVLdSt_96 : AVLdStOperand<AV_96>;
1439-
def AVLdSt_128 : AVLdStOperand<AV_128>;
1440-
def AVLdSt_160 : AVLdStOperand<AV_160>;
1441-
def AVLdSt_1024 : AVLdStOperand<AV_1024>;
1442-
1443-
def AVLdSt_96_Align1 : AVLdStOperand<AV_96>;
1444-
def AVLdSt_96_Align2 : AVLdStOperand<AV_96_Align2>;
1436+
1437+
foreach size = ["64", "96", "128", "160", "256", "1024" ] in {
1438+
// TODO: These cases should use target align variant
1439+
def AVLdSt_#size : AVLdStOperand<!cast<RegisterClass>("AV_"#size)>;
1440+
1441+
def AVLdSt_#size#_Align1 : AVLdStOperand<!cast<RegisterClass>("AV_"#size)>;
1442+
def AVLdSt_#size#_Align2 : AVLdStOperand<!cast<RegisterClass>("AV_"#size#_Align2)>;
1443+
}
14451444

14461445
//===----------------------------------------------------------------------===//
14471446
// ACSrc_* Operands with an AGPR or an inline constant

llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,74 +23,74 @@ global_load_dwordx4 a[1:4], v[0:1], off
2323

2424

2525
image_load v[1:2], v2, s[0:7] dmask:0x3 unorm
26-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
26+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
2727

2828
image_load v[1:3], v2, s[0:7] dmask:0x7 unorm
29-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
29+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
3030

3131
image_load v[1:4], v2, s[0:7] dmask:0xf unorm
32-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
32+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
3333

3434
image_load a[1:2], v2, s[0:7] dmask:0x3 unorm
35-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
35+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
3636

3737
image_load a[1:3], v2, s[0:7] dmask:0x7 unorm
38-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
38+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
3939

4040
image_load a[1:4], v2, s[0:7] dmask:0xf unorm
41-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
41+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
4242

4343

4444
image_store v[193:194], v[238:241], s[28:35] dmask:0x3 unorm
45-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
45+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
4646

4747
image_store v[193:195], v[238:241], s[28:35] dmask:0x7 unorm
48-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
48+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
4949

5050
image_store v[193:196], v[238:241], s[28:35] dmask:0xf unorm
51-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
51+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
5252

5353
image_store a[193:194], v[238:241], s[28:35] dmask:0x3 unorm
54-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
54+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5555

5656
image_store a[193:195], v[238:241], s[28:35] dmask:0x7 unorm
57-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
57+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5858

5959
image_store a[193:196], v[238:241], s[28:35] dmask:0xf unorm
60-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
60+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
6161

6262

6363
image_atomic_swap v4, v[193:196], s[28:35] dmask:0x1 unorm glc
64-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
64+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
6565

6666
image_atomic_swap v[5:6], v1, s[8:15] dmask:0x3 unorm
67-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
67+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
6868

6969

7070
image_atomic_cmpswap v[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc
71-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
71+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
7272

7373
image_atomic_cmpswap v[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc
74-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
74+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
7575

7676
image_atomic_cmpswap v[5:8], v[192:195], s[28:35] dmask:0xf unorm glc
77-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
77+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
7878

7979
image_atomic_cmpswap v[4:7], v[193:196], s[28:35] dmask:0xf unorm glc
80-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
80+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
8181

8282

8383
image_atomic_cmpswap a[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc
84-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
84+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
8585

8686
image_atomic_cmpswap a[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc
87-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
87+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
8888

8989
image_atomic_cmpswap a[5:8], v[192:195], s[28:35] dmask:0xf unorm glc
90-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
90+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
9191

9292
image_atomic_cmpswap a[4:7], v[193:196], s[28:35] dmask:0xf unorm glc
93-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
93+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
9494

9595

9696
v_mfma_f32_32x32x8f16 a[0:15], a[1:2], v[0:1], a[0:15]

0 commit comments

Comments
 (0)