diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 3703133126b0f..a9376250931b6 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -468,6 +468,56 @@ class DSAtomicRetPat { + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSAtomicRetPat(frag#"_local_m0_"#vt)>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt, + !cast(frag#"_local_"#vt)>; + } + + let OtherPredicates = [HasGDS] in { + def : DSAtomicRetPat(frag#"_region_m0_"#vt), + /* complexity */ 0, /* gds */ 1>; + } +} + +multiclass DSAtomicRetNoRetPat_NoM0_mc { + def : DSAtomicRetPat(frag#"_local_"#vt)>; + def : DSAtomicRetPat(frag#"_local_noret_"#vt), /* complexity */ 1>; +} + +multiclass DSAtomicRetNoRetPat_mc { + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSAtomicRetPat(frag#"_local_m0_"#vt)>; + def : DSAtomicRetPat(frag#"_local_m0_noret_"#vt), /* complexity */ 1>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + defm : DSAtomicRetNoRetPat_NoM0_mc< + !cast(!cast(inst)#"_gfx9"), + !cast(!cast(noRetInst)#"_gfx9"), + vt, frag>; + } + + let OtherPredicates = [HasGDS] in { + def : DSAtomicRetPat(frag#"_region_m0_"#vt), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt), + /* complexity */ 1, /* gds */ 1>; + } +} + defm DS_ADD_U32 : DS_1A1D_NORET_mc<"ds_add_u32">; defm DS_SUB_U32 : DS_1A1D_NORET_mc<"ds_sub_u32">; defm DS_RSUB_U32 : DS_1A1D_NORET_mc<"ds_rsub_u32">; @@ -518,10 +568,10 @@ let SubtargetPredicate = HasLdsAtomicAddF64 in { } // End SubtargetPredicate = HasLdsAtomicAddF64 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in { - defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc<"ds_pk_add_f16">; - defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32>; - defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc<"ds_pk_add_bf16">; - defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32>; + defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">; + defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32>; + defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">; + defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32>; } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">; @@ -774,10 +824,10 @@ def DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_BVH_STACK< "ds_bvh_stack_push8_pop2_rtn_b64", VReg_64, VReg_256>; } // End OtherPredicates = [HasImageInsts]. -defm DS_COND_SUB_U32 : DS_1A1D_NORET_mc<"ds_cond_sub_u32">; -defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>; -defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">; -defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>; +defm DS_COND_SUB_U32 : DS_1A1D_NORET_mc_gfx9<"ds_cond_sub_u32">; +defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_cond_sub_rtn_u32", VGPR_32>; +defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc_gfx9<"ds_sub_clamp_u32">; +defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_sub_clamp_rtn_u32", VGPR_32>; def DS_BPERMUTE_FI_B32 : DS_1A1D_PERMUTE <"ds_bpermute_fi_b32", int_amdgcn_ds_bpermute_fi_b32>; @@ -1088,50 +1138,6 @@ defm : DSWritePat_mc ; } // End AddedComplexity = 100 -multiclass DSAtomicRetPat_mc { - let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicRetPat(frag#"_local_m0_"#vt)>; - } - - let OtherPredicates = [NotLDSRequiresM0Init] in { - def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local_"#vt)>; - } - - let OtherPredicates = [HasGDS] in { - def : DSAtomicRetPat(frag#"_region_m0_"#vt), - /* complexity */ 0, /* gds */ 1>; - } -} - -multiclass DSAtomicRetNoRetPat_mc { - let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicRetPat(frag#"_local_m0_"#vt)>; - def : DSAtomicRetPat(frag#"_local_m0_noret_"#vt), /* complexity */ 1>; - } - - let OtherPredicates = [NotLDSRequiresM0Init] in { - def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local_"#vt)>; - def : DSAtomicRetPat(!cast(noRetInst)#"_gfx9"), vt, - !cast(frag#"_local_noret_"#vt), /* complexity */ 1>; - } - - let OtherPredicates = [HasGDS] in { - def : DSAtomicRetPat(frag#"_region_m0_"#vt), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt), - /* complexity */ 1, /* gds */ 1>; - } -} - - - let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { // Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode. class DSAtomicCmpXChgSwapped; -defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_NoM0_mc; +defm : DSAtomicRetNoRetPat_NoM0_mc; } let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { @@ -1262,7 +1268,7 @@ class DSAtomicRetPatIntrinsic; +defm : DSAtomicRetNoRetPat_NoM0_mc; } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts let OtherPredicates = [HasGDS] in @@ -1354,8 +1360,10 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op, DS_Pseudo ps, int ef, // GFX12. //===----------------------------------------------------------------------===// -multiclass DS_Real_gfx12 op, string name = !tolower(NAME)> { - defvar ps = !cast(NAME); +multiclass DS_Real_gfx12 op, + DS_Pseudo ps = !cast(NAME), + string name = !tolower(NAME)> { + let AssemblerPredicate = isGFX12Plus in { let DecoderNamespace = "GFX12" in def _gfx12 : @@ -1366,14 +1374,20 @@ multiclass DS_Real_gfx12 op, string name = !tolower(NAME)> { } // End AssemblerPredicate } -defm DS_MIN_F32 : DS_Real_gfx12<0x012, "ds_min_num_f32">; -defm DS_MAX_F32 : DS_Real_gfx12<0x013, "ds_max_num_f32">; -defm DS_MIN_RTN_F32 : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">; -defm DS_MAX_RTN_F32 : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">; -defm DS_MIN_F64 : DS_Real_gfx12<0x052, "ds_min_num_f64">; -defm DS_MAX_F64 : DS_Real_gfx12<0x053, "ds_max_num_f64">; -defm DS_MIN_RTN_F64 : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">; -defm DS_MAX_RTN_F64 : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">; +// Helper to avoid repeating the pseudo-name if we only need to set +// the gfx12 name. +multiclass DS_Real_gfx12_with_name op, string name> { + defm "" : DS_Real_gfx12(NAME), name>; +} + +defm DS_MIN_F32 : DS_Real_gfx12_with_name<0x012, "ds_min_num_f32">; +defm DS_MAX_F32 : DS_Real_gfx12_with_name<0x013, "ds_max_num_f32">; +defm DS_MIN_RTN_F32 : DS_Real_gfx12_with_name<0x032, "ds_min_num_rtn_f32">; +defm DS_MAX_RTN_F32 : DS_Real_gfx12_with_name<0x033, "ds_max_num_rtn_f32">; +defm DS_MIN_F64 : DS_Real_gfx12_with_name<0x052, "ds_min_num_f64">; +defm DS_MAX_F64 : DS_Real_gfx12_with_name<0x053, "ds_max_num_f64">; +defm DS_MIN_RTN_F64 : DS_Real_gfx12_with_name<0x072, "ds_min_num_rtn_f64">; +defm DS_MAX_RTN_F64 : DS_Real_gfx12_with_name<0x073, "ds_max_num_rtn_f64">; defm DS_COND_SUB_U32 : DS_Real_gfx12<0x098>; defm DS_SUB_CLAMP_U32 : DS_Real_gfx12<0x099>; defm DS_COND_SUB_RTN_U32 : DS_Real_gfx12<0x0a8>; @@ -1389,7 +1403,7 @@ defm DS_LOAD_TR6_B96 : DS_Real_gfx12<0x0fb>; defm DS_LOAD_TR16_B128 : DS_Real_gfx12<0x0fc>; defm DS_LOAD_TR8_B64 : DS_Real_gfx12<0x0fd>; -defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12<0x0e0, +defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12_with_name<0x0e0, "ds_bvh_stack_push4_pop1_rtn_b32">; defm DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_Real_gfx12<0x0e1>; defm DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_Real_gfx12<0x0e2>; @@ -1418,8 +1432,8 @@ def : MnemonicAlias<"ds_load_tr_b128", "ds_load_tr16_b128">, Requires<[isGFX1250 // GFX11. //===----------------------------------------------------------------------===// -multiclass DS_Real_gfx11 op, string name = !tolower(NAME)> { - defvar ps = !cast(NAME); +multiclass DS_Real_gfx11 op, DS_Pseudo ps = !cast(NAME), + string name = !tolower(NAME)> { let AssemblerPredicate = isGFX11Only in { let DecoderNamespace = "GFX11" in def _gfx11 : @@ -1430,8 +1444,11 @@ multiclass DS_Real_gfx11 op, string name = !tolower(NAME)> { } // End AssemblerPredicate } -multiclass DS_Real_gfx11_gfx12 op, string name = !tolower(NAME)> - : DS_Real_gfx11, DS_Real_gfx12; +multiclass DS_Real_gfx11_gfx12 op, + string name = !tolower(NAME), + DS_Pseudo ps = !cast(NAME)> + : DS_Real_gfx11, + DS_Real_gfx12; defm DS_WRITE_B32 : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">; defm DS_WRITE2_B32 : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">;