Skip to content

Commit 64c511f

Browse files
authored
[AMDGPU] Add additional aliases for load transpose instructions (llvm#163900)
1 parent a76c71b commit 64c511f

File tree

4 files changed

+99
-0
lines changed

4 files changed

+99
-0
lines changed

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,6 +1488,12 @@ let AssemblerPredicate = isGFX12Plus in {
14881488
def : MnemonicAlias<"ds_load_tr_b64", "ds_load_tr8_b64">, Requires<[isGFX1250Plus]>;
14891489
def : MnemonicAlias<"ds_load_tr_b128", "ds_load_tr16_b128">, Requires<[isGFX1250Plus]>;
14901490

1491+
// Additional aliases for ds load transpose instructions.
1492+
def : MnemonicAlias<"ds_load_b64_tr_b8", "ds_load_tr8_b64">, Requires<[isGFX125xOnly]>;
1493+
def : MnemonicAlias<"ds_load_b128_tr_b16", "ds_load_tr16_b128">, Requires<[isGFX125xOnly]>;
1494+
def : MnemonicAlias<"ds_load_b64_tr_b4", "ds_load_tr4_b64">, Requires<[isGFX125xOnly]>;
1495+
def : MnemonicAlias<"ds_load_b96_tr_b6", "ds_load_tr6_b96">, Requires<[isGFX125xOnly]>;
1496+
14911497
//===----------------------------------------------------------------------===//
14921498
// GFX11.
14931499
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3711,6 +3711,12 @@ defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "globa
37113711
defm GLOBAL_LOAD_TR4_B64 : VFLAT_Real_AllAddr_gfx1250<0x073>;
37123712
defm GLOBAL_LOAD_TR6_B96 : VFLAT_Real_AllAddr_gfx1250<0x074>;
37133713

3714+
// Additional aliases for global load transpose instructions.
3715+
def : MnemonicAlias<"global_load_b128_tr_b16", "global_load_tr16_b128">, Requires<[isGFX125xOnly]>;
3716+
def : MnemonicAlias<"global_load_b64_tr_b8", "global_load_tr8_b64">, Requires<[isGFX125xOnly]>;
3717+
def : MnemonicAlias<"global_load_b64_tr_b4", "global_load_tr4_b64">, Requires<[isGFX125xOnly]>;
3718+
def : MnemonicAlias<"global_load_b96_tr_b6", "global_load_tr6_b96">, Requires<[isGFX125xOnly]>;
3719+
37143720
defm FLAT_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>;
37153721
defm FLAT_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">;
37163722
defm FLAT_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">;

llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,15 @@ ds_load_tr_b64 v[2:3], v0
55

66
ds_load_tr_b128 v[2:5], v0
77
// GFX1250: ds_load_tr16_b128 v[2:5], v0 ; encoding: [0x00,0x00,0xf0,0xdb,0x00,0x00,0x00,0x02]
8+
9+
ds_load_b128_tr_b16 v[2:5], v0
10+
// GFX1250: ds_load_tr16_b128 v[2:5], v0 ; encoding: [0x00,0x00,0xf0,0xdb,0x00,0x00,0x00,0x02]
11+
12+
ds_load_b64_tr_b8 v[2:3], v0
13+
// GFX1250: ds_load_tr8_b64 v[2:3], v0 ; encoding: [0x00,0x00,0xf4,0xdb,0x00,0x00,0x00,0x02]
14+
15+
ds_load_b64_tr_b4 v[2:3], v0
16+
// GFX1250: ds_load_tr4_b64 v[2:3], v0 ; encoding: [0x00,0x00,0xe8,0xdb,0x00,0x00,0x00,0x02]
17+
18+
ds_load_tr6_b96 v[2:4], v0
19+
// GFX1250: ds_load_tr6_b96 v[2:4], v0 ; encoding: [0x00,0x00,0xec,0xdb,0x00,0x00,0x00,0x02]

llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,78 @@ global_load_tr_b128 v[2:5], v[6:7], off offset:64
3535

3636
global_load_tr_b128 v[2:5], v[6:7], off offset:-64
3737
// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:-64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]
38+
39+
global_load_b64_tr_b8 v[2:3], v0, s[0:1]
40+
// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
41+
42+
global_load_b64_tr_b8 v[2:3], v0, s[0:1] offset:64
43+
// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
44+
45+
global_load_b64_tr_b8 v[2:3], v0, s[0:1] offset:-64
46+
// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
47+
48+
global_load_b64_tr_b8 v[2:3], v[4:5], off
49+
// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00]
50+
51+
global_load_b64_tr_b8 v[2:3], v[4:5], off offset:64
52+
// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off offset:64 ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0x40,0x00,0x00]
53+
54+
global_load_b64_tr_b8 v[2:3], v[4:5], off offset:-64
55+
// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off offset:-64 ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0xc0,0xff,0xff]
56+
57+
global_load_b128_tr_b16 v[2:5], v0, s[0:1]
58+
// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
59+
60+
global_load_b128_tr_b16 v[2:5], v0, s[0:1] offset:64
61+
// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
62+
63+
global_load_b128_tr_b16 v[2:5], v0, s[0:1] offset:-64
64+
// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
65+
66+
global_load_b128_tr_b16 v[2:5], v[6:7], off
67+
// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00]
68+
69+
global_load_b128_tr_b16 v[2:5], v[6:7], off offset:64
70+
// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0x40,0x00,0x00]
71+
72+
global_load_b128_tr_b16 v[2:5], v[6:7], off offset:-64
73+
// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:-64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]
74+
75+
global_load_b64_tr_b4 v[2:3], v0, s[0:1]
76+
// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
77+
78+
global_load_b64_tr_b4 v[2:3], v0, s[0:1] offset:64
79+
// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
80+
81+
global_load_b64_tr_b4 v[2:3], v0, s[0:1] offset:-64
82+
// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
83+
84+
global_load_b64_tr_b4 v[2:3], v[4:5], off
85+
// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00]
86+
87+
global_load_b64_tr_b4 v[2:3], v[4:5], off offset:64
88+
// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off offset:64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0x40,0x00,0x00]
89+
90+
global_load_b64_tr_b4 v[2:3], v[4:5], off offset:-64
91+
// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off offset:-64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0xc0,0xff,0xff]
92+
93+
global_load_b96_tr_b6 v[2:4], v0, s[0:1]
94+
// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
95+
96+
global_load_b96_tr_b6 v[3:5], v0, s[0:1]
97+
// GFX1250: global_load_tr6_b96 v[3:5], v0, s[0:1] ; encoding: [0x00,0x00,0x1d,0xee,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
98+
99+
global_load_b96_tr_b6 v[2:4], v0, s[0:1] offset:64
100+
// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
101+
102+
global_load_b96_tr_b6 v[2:4], v0, s[0:1] offset:-64
103+
// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
104+
105+
global_load_b96_tr_b6 v[2:4], v[6:7], off
106+
// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00]
107+
108+
global_load_b96_tr_b6 v[2:4], v[6:7], off offset:64
109+
// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off offset:64 ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0x40,0x00,0x00]
110+
111+
global_load_b96_tr_b6 v[2:4], v[6:7], off offset:-64
112+
// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off offset:-64 ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]

0 commit comments

Comments
 (0)