diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index d0ad120e7ca650..b841171c285d80 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1488,6 +1488,12 @@ let AssemblerPredicate = isGFX12Plus in { def : MnemonicAlias<"ds_load_tr_b64", "ds_load_tr8_b64">, Requires<[isGFX1250Plus]>; def : MnemonicAlias<"ds_load_tr_b128", "ds_load_tr16_b128">, Requires<[isGFX1250Plus]>; +// Additional aliases for ds load transpose instructions. +def : MnemonicAlias<"ds_load_b64_tr_b8", "ds_load_tr8_b64">, Requires<[isGFX125xOnly]>; +def : MnemonicAlias<"ds_load_b128_tr_b16", "ds_load_tr16_b128">, Requires<[isGFX125xOnly]>; +def : MnemonicAlias<"ds_load_b64_tr_b4", "ds_load_tr4_b64">, Requires<[isGFX125xOnly]>; +def : MnemonicAlias<"ds_load_b96_tr_b6", "ds_load_tr6_b96">, Requires<[isGFX125xOnly]>; + //===----------------------------------------------------------------------===// // GFX11. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 6de59be7665b48..8ea64d17417f78 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -3711,6 +3711,12 @@ defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "globa defm GLOBAL_LOAD_TR4_B64 : VFLAT_Real_AllAddr_gfx1250<0x073>; defm GLOBAL_LOAD_TR6_B96 : VFLAT_Real_AllAddr_gfx1250<0x074>; +// Additional aliases for global load transpose instructions. +def : MnemonicAlias<"global_load_b128_tr_b16", "global_load_tr16_b128">, Requires<[isGFX125xOnly]>; +def : MnemonicAlias<"global_load_b64_tr_b8", "global_load_tr8_b64">, Requires<[isGFX125xOnly]>; +def : MnemonicAlias<"global_load_b64_tr_b4", "global_load_tr4_b64">, Requires<[isGFX125xOnly]>; +def : MnemonicAlias<"global_load_b96_tr_b6", "global_load_tr6_b96">, Requires<[isGFX125xOnly]>; + defm FLAT_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>; defm FLAT_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">; defm FLAT_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">; diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s index 5b6bb477bdc91a..83313a29657f21 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s @@ -5,3 +5,15 @@ ds_load_tr_b64 v[2:3], v0 ds_load_tr_b128 v[2:5], v0 // GFX1250: ds_load_tr16_b128 v[2:5], v0 ; encoding: [0x00,0x00,0xf0,0xdb,0x00,0x00,0x00,0x02] + +ds_load_b128_tr_b16 v[2:5], v0 +// GFX1250: ds_load_tr16_b128 v[2:5], v0 ; encoding: [0x00,0x00,0xf0,0xdb,0x00,0x00,0x00,0x02] + +ds_load_b64_tr_b8 v[2:3], v0 +// GFX1250: ds_load_tr8_b64 v[2:3], v0 ; encoding: [0x00,0x00,0xf4,0xdb,0x00,0x00,0x00,0x02] + +ds_load_b64_tr_b4 v[2:3], v0 +// GFX1250: ds_load_tr4_b64 v[2:3], v0 ; encoding: [0x00,0x00,0xe8,0xdb,0x00,0x00,0x00,0x02] + +ds_load_tr6_b96 v[2:4], v0 +// GFX1250: ds_load_tr6_b96 v[2:4], v0 ; encoding: [0x00,0x00,0xec,0xdb,0x00,0x00,0x00,0x02] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s index 6b2dd67b073e32..f983bc0b3dfcab 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s @@ -35,3 +35,78 @@ global_load_tr_b128 v[2:5], v[6:7], off offset:64 global_load_tr_b128 v[2:5], v[6:7], off offset:-64 // GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:-64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff] + +global_load_b64_tr_b8 v[2:3], v0, s[0:1] +// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +global_load_b64_tr_b8 v[2:3], v0, s[0:1] offset:64 +// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +global_load_b64_tr_b8 v[2:3], v0, s[0:1] offset:-64 +// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +global_load_b64_tr_b8 v[2:3], v[4:5], off +// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00] + +global_load_b64_tr_b8 v[2:3], v[4:5], off offset:64 +// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off offset:64 ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0x40,0x00,0x00] + +global_load_b64_tr_b8 v[2:3], v[4:5], off offset:-64 +// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off offset:-64 ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0xc0,0xff,0xff] + +global_load_b128_tr_b16 v[2:5], v0, s[0:1] +// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +global_load_b128_tr_b16 v[2:5], v0, s[0:1] offset:64 +// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +global_load_b128_tr_b16 v[2:5], v0, s[0:1] offset:-64 +// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +global_load_b128_tr_b16 v[2:5], v[6:7], off +// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00] + +global_load_b128_tr_b16 v[2:5], v[6:7], off offset:64 +// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0x40,0x00,0x00] + +global_load_b128_tr_b16 v[2:5], v[6:7], off offset:-64 +// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:-64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff] + +global_load_b64_tr_b4 v[2:3], v0, s[0:1] +// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +global_load_b64_tr_b4 v[2:3], v0, s[0:1] offset:64 +// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +global_load_b64_tr_b4 v[2:3], v0, s[0:1] offset:-64 +// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +global_load_b64_tr_b4 v[2:3], v[4:5], off +// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00] + +global_load_b64_tr_b4 v[2:3], v[4:5], off offset:64 +// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off offset:64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0x40,0x00,0x00] + +global_load_b64_tr_b4 v[2:3], v[4:5], off offset:-64 +// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off offset:-64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0xc0,0xff,0xff] + +global_load_b96_tr_b6 v[2:4], v0, s[0:1] +// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +global_load_b96_tr_b6 v[3:5], v0, s[0:1] +// GFX1250: global_load_tr6_b96 v[3:5], v0, s[0:1] ; encoding: [0x00,0x00,0x1d,0xee,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +global_load_b96_tr_b6 v[2:4], v0, s[0:1] offset:64 +// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +global_load_b96_tr_b6 v[2:4], v0, s[0:1] offset:-64 +// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +global_load_b96_tr_b6 v[2:4], v[6:7], off +// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00] + +global_load_b96_tr_b6 v[2:4], v[6:7], off offset:64 +// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off offset:64 ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0x40,0x00,0x00] + +global_load_b96_tr_b6 v[2:4], v[6:7], off offset:-64 +// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off offset:-64 ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]