From 6bd101702a80e9b259b136af3c27502484dcb2f3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 3 Sep 2025 10:58:25 +0900 Subject: [PATCH 1/2] AMDGPU: Change DS classes to use RegisterOperand parameters Start stripping out the uses of getLdStRegisterOperand. This added a confusing level of indirection where the class at the definition point was not the actual class used. This was also pulling in the AV class usage for targets where it isn't relevant. This was also inflexible for special cases. Also fixes using default arguments which only served to wrap the class argument in a RegisterOperand. This should be done for all the memory instructions. --- llvm/lib/Target/AMDGPU/DSInstructions.td | 319 ++++++++++++----------- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 11 + llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 20 ++ 3 files changed, 197 insertions(+), 153 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 7552326c39468..960f3282fb6f6 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -130,10 +130,10 @@ class DS_Real : // DS Pseudo instructions -class DS_0A1D_NORET +class DS_0A1D_NORET : DS_Pseudo.ret:$data0, Offset:$offset, gds:$gds), + (ins rc:$data0, Offset:$offset, gds:$gds), " $data0$offset$gds"> { let has_addr = 0; @@ -141,10 +141,10 @@ class DS_0A1D_NORET let has_vdst = 0; } -class DS_1A1D_NORET +class DS_1A1D_NORET : DS_Pseudo.ret:$data0, Offset:$offset, gds:$gds), + (ins VGPR_32:$addr, rc:$data0, Offset:$offset, gds:$gds), " $addr, $data0$offset$gds"> { let has_data1 = 0; @@ -152,7 +152,7 @@ class DS_1A1D_NORET let IsAtomicNoRet = 1; } -multiclass DS_1A1D_NORET_mc { +multiclass DS_1A1D_NORET_mc { def "" : DS_1A1D_NORET; let has_m0_read = 0 in { @@ -160,23 +160,23 @@ multiclass DS_1A1D_NORET_mc { } } -multiclass DS_1A1D_NORET_t16 +multiclass DS_1A1D_NORET_t16 : DS_1A1D_NORET_mc { let has_m0_read = 0 in { let True16Predicate = UseRealTrue16Insts in { - def "_t16" : DS_1A1D_NORET, + def "_t16" : DS_1A1D_NORET, True16D16Table; } } } -multiclass DS_1A1D_NORET_mc_gfx9 { +multiclass DS_1A1D_NORET_mc_gfx9 { let has_m0_read = 0 in { def "" : DS_1A1D_NORET; } } -class DS_1A2D_NORET +class DS_1A2D_NORET : DS_Pseudo let IsAtomicNoRet = 1; } -multiclass DS_1A2D_NORET_mc { +// DS_xx2D cases should only be instantiated with VGPR operand classes. +multiclass DS_1A2D_NORET_mc { + assert OperandIsVGPR.ret, + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_NORET; let has_m0_read = 0 in { @@ -194,12 +198,12 @@ multiclass DS_1A2D_NORET_mc { // All data operands are replaced with AGPRs in this form. let SubtargetPredicate = isGFX90APlus in { - def _agpr : DS_1A2D_NORET.ret>; + def _agpr : DS_1A2D_NORET.ret>; } } } -class DS_1A2D_Off8_NORET +class DS_1A2D_Off8_NORET : DS_Pseudo let has_offset = 0; } -multiclass DS_1A2D_Off8_NORET_mc { +multiclass DS_1A2D_Off8_NORET_mc { + assert OperandIsVGPR.ret, + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_Off8_NORET; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_Off8_NORET; let SubtargetPredicate = isGFX90APlus in { - def _agpr : DS_1A2D_Off8_NORET.ret>; + def _agpr : DS_1A2D_Off8_NORET.ret>; } } } -class DS_0A1D_RET_GDS.ret, - RegisterOperand src_op = getLdStRegisterOperand.ret> +class DS_0A1D_RET_GDS : DS_Pseudo.ret> +class DS_1A1D_RET : DS_Pseudo { +multiclass DS_1A1D_RET_mc { def "" : DS_1A1D_RET; let has_m0_read = 0 in { @@ -256,15 +261,15 @@ multiclass DS_1A1D_RET_mc { } } -multiclass DS_1A1D_RET_mc_gfx9 { +multiclass DS_1A1D_RET_mc_gfx9 { let has_m0_read = 0 in { def "" : DS_1A1D_RET; } } class DS_1A2D_RET: DS_Pseudo: DS_Pseudo { @@ -273,20 +278,23 @@ class DS_1A2D_RET { + RegisterOperand dst_rc = VGPROp_32, + RegisterOperand src_rc = dst_rc> { + assert !and(OperandIsVGPR.ret, OperandIsVGPR.ret), + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_RET; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_RET; - def _agpr : DS_1A2D_RET.ret, - getEquivalentAGPRClass.ret>; + def _agpr : DS_1A2D_RET.ret, + getEquivalentAGPROperand.ret>; } } class DS_1A2D_Off8_RET + RegisterOperand dst_rc = VGPROp_32, + RegisterOperand src_rc = dst_rc> : DS_Pseudo { + RegisterOperand dst_rc = VGPROp_32, + RegisterOperand src_rc = dst_rc> { + assert !and(OperandIsVGPR.ret, OperandIsVGPR.ret) , + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_Off8_RET; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_Off8_RET; - def _agpr : DS_1A2D_Off8_RET.ret, - getEquivalentAGPRClass.ret>; + def _agpr : DS_1A2D_Off8_RET.ret, + getEquivalentAGPROperand.ret>; } } class DS_BVH_STACK + RegisterOperand vdst_rc, + RegisterOperand data1_rc> : DS_Pseudo.ret:$vdst, VGPR_32:$addr), - (ins VGPR_32:$addr_in, getLdStRegisterOperand.ret:$data0, - data1_rc:$data1, Offset:$offset), + (outs vdst_rc:$vdst, VGPR_32:$addr), + (ins VGPR_32:$addr_in, VGPR_32:$data0, data1_rc:$data1, Offset:$offset), " $vdst, $addr, $data0, $data1$offset"> { let Constraints = "$addr = $addr_in"; let has_gds = 0; @@ -323,8 +333,8 @@ class DS_BVH_STACK.ret> +class DS_1A_RET : DS_Pseudo { +multiclass DS_1A_RET_mc { def "" : DS_1A_RET; let has_m0_read = 0 in { @@ -344,27 +355,28 @@ multiclass DS_1A_RET_mc +multiclass DS_1A_RET_t16 : DS_1A_RET_mc { let has_m0_read = 0 in { let True16Predicate = UseRealTrue16Insts in { - def "_t16" : DS_1A_RET, True16D16Table; + def "_t16" : DS_1A_RET, True16D16Table; } } } -multiclass DS_1A_RET_NoM0 { +multiclass DS_1A_RET_NoM0 { let has_m0_read = 0 in { def "" : DS_1A_RET; } } -class DS_1A_RET_Tied : +class DS_1A_RET_Tied : DS_1A_RET; -class DS_1A_Off8_RET +class DS_1A_Off8_RET : DS_Pseudo.ret:$vdst), + (outs rc:$vdst), (ins VGPR_32:$addr, Offset0:$offset0, Offset1:$offset1, gds:$gds), " $vdst, $addr$offset0$offset1$gds"> { @@ -373,7 +385,7 @@ class DS_1A_Off8_RET let has_data1 = 0; } -multiclass DS_1A_Off8_RET_mc { +multiclass DS_1A_Off8_RET_mc { def "" : DS_1A_Off8_RET; let has_m0_read = 0 in { @@ -382,7 +394,7 @@ multiclass DS_1A_Off8_RET_mc { } class DS_1A_RET_GDS : DS_Pseudo.ret:$vdst), + (outs AVLdSt_32:$vdst), (ins VGPR_32:$addr, Offset:$offset), " $vdst, $addr$offset gds"> { @@ -407,7 +419,7 @@ class DS_1A_Off16_NORET } class DS_0A_RET : DS_Pseudo.ret:$vdst), + (outs AVLdSt_32:$vdst), (ins Offset:$offset, gds:$gds), " $vdst$offset$gds"> { @@ -462,7 +474,7 @@ class DS_GWS_0D class DS_GWS_1D : DS_GWS.ret:$data0, Offset:$offset), + (ins AVLdSt_32:$data0, Offset:$offset), " $data0$offset gds"> { let has_gws_data0 = 1; @@ -487,7 +499,7 @@ class DS_VOID : DS_Pseudo.ret> + RegisterOperand data_op = AVLdSt_32> : DS_Pseudo; } // End mayLoad = 0 let SubtargetPredicate = HasLdsAtomicAddF64 in { - defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>; - defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64>; + defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", AVLdSt_64>; + defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", AVLdSt_64>; } // End SubtargetPredicate = HasLdsAtomicAddF64 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in { defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">; - defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32>; + defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16">; defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">; - defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32>; + defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16">; } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">; defm DS_CMPSTORE_F32 : DS_1A2D_NORET_mc<"ds_cmpstore_f32">; -defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>; -defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>; -defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32>; -defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32>; -defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64>; -defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64>; +defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VGPROp_64>; +defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VGPROp_64>; +defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32">; +defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32">; +defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VGPROp_64>; +defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VGPROp_64>; defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">; defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">; defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">; -defm DS_ADD_U64 : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>; -defm DS_SUB_U64 : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>; -defm DS_RSUB_U64 : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>; -defm DS_INC_U64 : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>; -defm DS_DEC_U64 : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>; -defm DS_MIN_I64 : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>; -defm DS_MAX_I64 : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>; -defm DS_MIN_U64 : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>; -defm DS_MAX_U64 : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>; -defm DS_AND_B64 : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>; -defm DS_OR_B64 : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>; -defm DS_XOR_B64 : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>; -defm DS_MSKOR_B64 : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>; +defm DS_ADD_U64 : DS_1A1D_NORET_mc<"ds_add_u64", AVLdSt_64>; +defm DS_SUB_U64 : DS_1A1D_NORET_mc<"ds_sub_u64", AVLdSt_64>; +defm DS_RSUB_U64 : DS_1A1D_NORET_mc<"ds_rsub_u64", AVLdSt_64>; +defm DS_INC_U64 : DS_1A1D_NORET_mc<"ds_inc_u64", AVLdSt_64>; +defm DS_DEC_U64 : DS_1A1D_NORET_mc<"ds_dec_u64", AVLdSt_64>; +defm DS_MIN_I64 : DS_1A1D_NORET_mc<"ds_min_i64", AVLdSt_64>; +defm DS_MAX_I64 : DS_1A1D_NORET_mc<"ds_max_i64", AVLdSt_64>; +defm DS_MIN_U64 : DS_1A1D_NORET_mc<"ds_min_u64", AVLdSt_64>; +defm DS_MAX_U64 : DS_1A1D_NORET_mc<"ds_max_u64", AVLdSt_64>; +defm DS_AND_B64 : DS_1A1D_NORET_mc<"ds_and_b64", AVLdSt_64>; +defm DS_OR_B64 : DS_1A1D_NORET_mc<"ds_or_b64", AVLdSt_64>; +defm DS_XOR_B64 : DS_1A1D_NORET_mc<"ds_xor_b64", AVLdSt_64>; +defm DS_MSKOR_B64 : DS_1A2D_NORET_mc<"ds_mskor_b64", VGPROp_64>; let mayLoad = 0 in { -defm DS_WRITE_B64 : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>; -defm DS_WRITE2_B64 : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>; -defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>; +defm DS_WRITE_B64 : DS_1A1D_NORET_mc<"ds_write_b64", AVLdSt_64>; +defm DS_WRITE2_B64 : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VGPROp_64>; +defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VGPROp_64>; } -defm DS_CMPST_B64 : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>; -defm DS_CMPST_F64 : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>; -defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>; -defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>; +defm DS_CMPST_B64 : DS_1A2D_NORET_mc<"ds_cmpst_b64", VGPROp_64>; +defm DS_CMPST_F64 : DS_1A2D_NORET_mc<"ds_cmpst_f64", VGPROp_64>; +defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", AVLdSt_64>; +defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", AVLdSt_64>; -defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32>; +defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32">; let SubtargetPredicate = HasLDSFPAtomicAddF32 in { -defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32>; -} -defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32>; -defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32>; -defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32>; -defm DS_DEC_RTN_U32 : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32>; -defm DS_MIN_RTN_I32 : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32>; -defm DS_MAX_RTN_I32 : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32>; -defm DS_MIN_RTN_U32 : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32>; -defm DS_MAX_RTN_U32 : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32>; -defm DS_AND_RTN_B32 : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32>; -defm DS_OR_RTN_B32 : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32>; -defm DS_XOR_RTN_B32 : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32>; -defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32>; -defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32>; -defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32>; -defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32>; -defm DS_MAX_RTN_F32 : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32>; +defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32">; +} +defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32">; +defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32">; +defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32">; +defm DS_DEC_RTN_U32 : DS_1A1D_RET_mc<"ds_dec_rtn_u32">; +defm DS_MIN_RTN_I32 : DS_1A1D_RET_mc<"ds_min_rtn_i32">; +defm DS_MAX_RTN_I32 : DS_1A1D_RET_mc<"ds_max_rtn_i32">; +defm DS_MIN_RTN_U32 : DS_1A1D_RET_mc<"ds_min_rtn_u32">; +defm DS_MAX_RTN_U32 : DS_1A1D_RET_mc<"ds_max_rtn_u32">; +defm DS_AND_RTN_B32 : DS_1A1D_RET_mc<"ds_and_rtn_b32">; +defm DS_OR_RTN_B32 : DS_1A1D_RET_mc<"ds_or_rtn_b32">; +defm DS_XOR_RTN_B32 : DS_1A1D_RET_mc<"ds_xor_rtn_b32">; +defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPROp_32>; +defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPROp_32>; +defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPROp_32>; +defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc<"ds_min_rtn_f32">; +defm DS_MAX_RTN_F32 : DS_1A1D_RET_mc<"ds_max_rtn_f32">; defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">; -defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>; -defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>; - -defm DS_ADD_RTN_U64 : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64>; -defm DS_SUB_RTN_U64 : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64>; -defm DS_RSUB_RTN_U64 : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64>; -defm DS_INC_RTN_U64 : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64>; -defm DS_DEC_RTN_U64 : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64>; -defm DS_MIN_RTN_I64 : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64>; -defm DS_MAX_RTN_I64 : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64>; -defm DS_MIN_RTN_U64 : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64>; -defm DS_MAX_RTN_U64 : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64>; -defm DS_AND_RTN_B64 : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64>; -defm DS_OR_RTN_B64 : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64>; -defm DS_XOR_RTN_B64 : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64>; -defm DS_MSKOR_RTN_B64 : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64>; -defm DS_CMPST_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64>; -defm DS_CMPST_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64>; -defm DS_MIN_RTN_F64 : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64>; -defm DS_MAX_RTN_F64 : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64>; - -defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>; -defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>; -defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>; +defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VGPROp_64, VGPROp_32>; +defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VGPROp_64, VGPROp_32>; + +defm DS_ADD_RTN_U64 : DS_1A1D_RET_mc<"ds_add_rtn_u64", AVLdSt_64>; +defm DS_SUB_RTN_U64 : DS_1A1D_RET_mc<"ds_sub_rtn_u64", AVLdSt_64>; +defm DS_RSUB_RTN_U64 : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", AVLdSt_64>; +defm DS_INC_RTN_U64 : DS_1A1D_RET_mc<"ds_inc_rtn_u64", AVLdSt_64>; +defm DS_DEC_RTN_U64 : DS_1A1D_RET_mc<"ds_dec_rtn_u64", AVLdSt_64>; +defm DS_MIN_RTN_I64 : DS_1A1D_RET_mc<"ds_min_rtn_i64", AVLdSt_64>; +defm DS_MAX_RTN_I64 : DS_1A1D_RET_mc<"ds_max_rtn_i64", AVLdSt_64>; +defm DS_MIN_RTN_U64 : DS_1A1D_RET_mc<"ds_min_rtn_u64", AVLdSt_64>; +defm DS_MAX_RTN_U64 : DS_1A1D_RET_mc<"ds_max_rtn_u64", AVLdSt_64>; +defm DS_AND_RTN_B64 : DS_1A1D_RET_mc<"ds_and_rtn_b64", AVLdSt_64>; +defm DS_OR_RTN_B64 : DS_1A1D_RET_mc<"ds_or_rtn_b64", AVLdSt_64>; +defm DS_XOR_RTN_B64 : DS_1A1D_RET_mc<"ds_xor_rtn_b64", AVLdSt_64>; +defm DS_MSKOR_RTN_B64 : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VGPROp_64>; +defm DS_CMPST_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VGPROp_64>; +defm DS_CMPST_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VGPROp_64>; +defm DS_MIN_RTN_F64 : DS_1A1D_RET_mc<"ds_min_rtn_f64", AVLdSt_64>; +defm DS_MAX_RTN_F64 : DS_1A1D_RET_mc<"ds_max_rtn_f64", AVLdSt_64>; + +defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", AVLdSt_64>; +defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VGPROp_128, VGPROp_64>; +defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VGPROp_128, VGPROp_64>; let isConvergent = 1, usesCustomInserter = 1 in { def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> { @@ -745,19 +757,19 @@ def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; } // End SubtargetPredicate = HasDsSrc2Insts let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { -def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>; +def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", AVLdSt_32, 0, Swizzle>; } let mayStore = 0 in { defm DS_READ_I16 : DS_1A_RET_mc<"ds_read_i16">; defm DS_READ_B32 : DS_1A_RET_mc<"ds_read_b32">; -defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", VReg_64>; +defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", AVLdSt_64>; -defm DS_READ2_B32 : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>; -defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>; +defm DS_READ2_B32 : DS_1A_Off8_RET_mc<"ds_read2_b32", AVLdSt_64>; +defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", AVLdSt_64>; -defm DS_READ2_B64 : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>; -defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>; +defm DS_READ2_B64 : DS_1A_Off8_RET_mc<"ds_read2_b64", AVLdSt_128>; +defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", AVLdSt_128>; let has_m0_read = 0 in { let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in { @@ -792,21 +804,21 @@ def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">; let SubtargetPredicate = isGFX7Plus in { -defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>; -defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>; +defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPROp_32>; +defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", AVLdSt_64>; let isConvergent = 1, usesCustomInserter = 1 in { def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">; } let mayStore = 0 in { -defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>; -defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>; +defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", AVLdSt_96>; +defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", AVLdSt_128>; } // End mayStore = 0 let mayLoad = 0 in { -defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>; -defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>; +defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", AVLdSt_96>; +defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", AVLdSt_128>; } // End mayLoad = 0 def DS_NOP : DS_VOID<"ds_nop">; @@ -839,8 +851,8 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; let SubtargetPredicate = isGFX11Only in { -def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>; -def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>; +def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VGPROp_64, VGPROp_32>; +def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VGPROp_64, VGPROp_32>; } // let SubtargetPredicate = isGFX11Only @@ -848,7 +860,7 @@ let SubtargetPredicate = isGFX11Plus in { let OtherPredicates = [HasImageInsts] in def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32", - VGPR_32, VReg_128> ; + VGPROp_32, VGPROp_128> ; } // let SubtargetPredicate = isGFX11Plus @@ -860,15 +872,15 @@ let SubtargetPredicate = isGFX12Plus in { let OtherPredicates = [HasImageInsts] in { def DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_BVH_STACK< - "ds_bvh_stack_push8_pop1_rtn_b32", VGPR_32, VReg_256>; + "ds_bvh_stack_push8_pop1_rtn_b32", VGPROp_32, VGPROp_256>; def DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_BVH_STACK< - "ds_bvh_stack_push8_pop2_rtn_b64", VReg_64, VReg_256>; + "ds_bvh_stack_push8_pop2_rtn_b64", VGPROp_64, VGPROp_256>; } // End OtherPredicates = [HasImageInsts]. defm DS_COND_SUB_U32 : DS_1A1D_NORET_mc_gfx9<"ds_cond_sub_u32">; -defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_cond_sub_rtn_u32", VGPR_32>; +defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_cond_sub_rtn_u32", VGPROp_32>; defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc_gfx9<"ds_sub_clamp_u32">; -defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_sub_clamp_rtn_u32", VGPR_32>; +defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_sub_clamp_rtn_u32", VGPROp_32>; def DS_BPERMUTE_FI_B32 : DS_1A1D_PERMUTE <"ds_bpermute_fi_b32", int_amdgcn_ds_bpermute_fi_b32>; @@ -889,11 +901,11 @@ let SubtargetPredicate = isGFX1250Plus in { let WaveSizePredicate = isWave32, mayStore = 0 in { let OtherPredicates = [HasTransposeLoadF4F6Insts] in { -defm DS_LOAD_TR4_B64 : DS_1A_RET_NoM0<"ds_load_tr4_b64", VReg_64>; -defm DS_LOAD_TR6_B96 : DS_1A_RET_NoM0<"ds_load_tr6_b96", VReg_96>; +defm DS_LOAD_TR4_B64 : DS_1A_RET_NoM0<"ds_load_tr4_b64", VGPROp_64>; +defm DS_LOAD_TR6_B96 : DS_1A_RET_NoM0<"ds_load_tr6_b96", VGPROp_96>; } // End OtherPredicates = [HasTransposeLoadF4F6Insts] -defm DS_LOAD_TR8_B64 : DS_1A_RET_NoM0<"ds_load_tr8_b64", VReg_64>; -defm DS_LOAD_TR16_B128 : DS_1A_RET_NoM0<"ds_load_tr16_b128", VReg_128>; +defm DS_LOAD_TR8_B64 : DS_1A_RET_NoM0<"ds_load_tr8_b64", VGPROp_64>; +defm DS_LOAD_TR16_B128 : DS_1A_RET_NoM0<"ds_load_tr16_b128", VGPROp_128>; } // End WaveSizePredicate = isWave32, mayStore = 0 let OtherPredicates = [HasLdsBarrierArriveAtomic] in { @@ -906,7 +918,7 @@ def : GCNPat < (DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 VGPR_32:$ptr, Offset:$offset, (i1 0)) >; -defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_1A1D_RET_mc_gfx9<"ds_atomic_barrier_arrive_rtn_b64", VReg_64>; +defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_1A1D_RET_mc_gfx9<"ds_atomic_barrier_arrive_rtn_b64", VGPROp_64>; def : GCNPat< (i64 (int_amdgcn_ds_atomic_barrier_arrive_rtn_b64 (DS1Addr1Offset i32:$ptr, i32:$offset), i64:$data)), @@ -917,10 +929,10 @@ def : GCNPat< } // End SubtargetPredicate = isGFX1250Plus let WaveSizePredicate = isWave64, SubtargetPredicate = HasGFX950Insts, mayStore = 0 in { - defm DS_READ_B64_TR_B4 : DS_1A_RET_NoM0<"ds_read_b64_tr_b4", VReg_64>; - defm DS_READ_B64_TR_B8 : DS_1A_RET_NoM0<"ds_read_b64_tr_b8", VReg_64>; - defm DS_READ_B64_TR_B16 : DS_1A_RET_NoM0<"ds_read_b64_tr_b16", VReg_64>; - defm DS_READ_B96_TR_B6 : DS_1A_RET_NoM0<"ds_read_b96_tr_b6", VReg_96>; + defm DS_READ_B64_TR_B4 : DS_1A_RET_NoM0<"ds_read_b64_tr_b4", AVLdSt_64>; + defm DS_READ_B64_TR_B8 : DS_1A_RET_NoM0<"ds_read_b64_tr_b8", AVLdSt_64>; + defm DS_READ_B64_TR_B16 : DS_1A_RET_NoM0<"ds_read_b64_tr_b16", AVLdSt_64>; + defm DS_READ_B96_TR_B6 : DS_1A_RET_NoM0<"ds_read_b96_tr_b6", AVLdSt_96>; } //===----------------------------------------------------------------------===// @@ -1072,6 +1084,7 @@ class DS64Bit4ByteAlignedReadPat : G (inst $ptr, $offset0, $offset1, (i1 0)) >; +// TODO: Should this use AVLdSt_64 for the class? class DS64Bit4ByteAlignedWritePat : GCNPat< (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i32:$offset0, i32:$offset1)), (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)), diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a7cf1faa60ce2..50d3b4baef38d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2606,6 +2606,17 @@ class getEquivalentAGPRClass { !eq(RC.Size, 1024) : AReg_1024); } +class getEquivalentAGPROperand { + defvar Size = RC.RegClass.Size; + RegisterOperand ret = + !cond(!eq(Size, 32) : RegisterOperand, + !eq(Size, 64) : RegisterOperand, + !eq(Size, 96) : RegisterOperand, + !eq(Size, 128) : RegisterOperand, + !eq(Size, 160) : RegisterOperand, + !eq(Size, 1024) : RegisterOperand); +} + class getHasVOP3DPP { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index fad428d1e43a5..d00ab839ff814 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1344,6 +1344,22 @@ def VGPRSrc_16 : RegisterOperand { let EncoderMethod = "getMachineOpValueT16"; } +// TODO: These cases should use default target alignment +def VGPROp_16 : RegisterOperand; +def VGPROp_32 : RegisterOperand; +def VGPROp_64 : RegisterOperand; +def VGPROp_96 : RegisterOperand; +def VGPROp_128 : RegisterOperand; +def VGPROp_256 : RegisterOperand; + +def VGPROp_64_Align1 : RegisterOperand; +def VGPROp_96_Align1 : RegisterOperand; +def VGPROp_128_Align1 : RegisterOperand; + +def VGPROp_64_Align2 : RegisterOperand; +def VGPROp_128_Align2 : RegisterOperand; + + //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// @@ -1422,6 +1438,7 @@ def AVDst_512 : AVDstOperand; class AVLdStOperand : AVOperand; +// TODO: These cases should use target align variant def AVLdSt_32 : AVLdStOperand; def AVLdSt_64 : AVLdStOperand; def AVLdSt_96 : AVLdStOperand; @@ -1429,6 +1446,9 @@ def AVLdSt_128 : AVLdStOperand; def AVLdSt_160 : AVLdStOperand; def AVLdSt_1024 : AVLdStOperand; +def AVLdSt_96_Align1 : AVLdStOperand; +def AVLdSt_96_Align2 : AVLdStOperand; + //===----------------------------------------------------------------------===// // ACSrc_* Operands with an AGPR or an inline constant //===----------------------------------------------------------------------===// From 284b8e4dd48a15dcf1af6e6baf7bef114280f26a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 4 Sep 2025 12:08:48 +0900 Subject: [PATCH 2/2] Use foreach to define VGPROp --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index d00ab839ff814..dd6030769c03c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1347,18 +1347,12 @@ def VGPRSrc_16 : RegisterOperand { // TODO: These cases should use default target alignment def VGPROp_16 : RegisterOperand; def VGPROp_32 : RegisterOperand; -def VGPROp_64 : RegisterOperand; -def VGPROp_96 : RegisterOperand; -def VGPROp_128 : RegisterOperand; -def VGPROp_256 : RegisterOperand; - -def VGPROp_64_Align1 : RegisterOperand; -def VGPROp_96_Align1 : RegisterOperand; -def VGPROp_128_Align1 : RegisterOperand; - -def VGPROp_64_Align2 : RegisterOperand; -def VGPROp_128_Align2 : RegisterOperand; +foreach size = ["64", "96", "128", "256" ] in { + def VGPROp_#size : RegisterOperand("VReg_"#size)>; + def VGPROp_#size#_Align1 : RegisterOperand("VReg_"#size)>; + def VGPROp_#size#_Align2 : RegisterOperand("VReg_"#size#_Align2)>; +} //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR