diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 7552326c39468..960f3282fb6f6 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -130,10 +130,10 @@ class DS_Real : // DS Pseudo instructions -class DS_0A1D_NORET +class DS_0A1D_NORET : DS_Pseudo.ret:$data0, Offset:$offset, gds:$gds), + (ins rc:$data0, Offset:$offset, gds:$gds), " $data0$offset$gds"> { let has_addr = 0; @@ -141,10 +141,10 @@ class DS_0A1D_NORET let has_vdst = 0; } -class DS_1A1D_NORET +class DS_1A1D_NORET : DS_Pseudo.ret:$data0, Offset:$offset, gds:$gds), + (ins VGPR_32:$addr, rc:$data0, Offset:$offset, gds:$gds), " $addr, $data0$offset$gds"> { let has_data1 = 0; @@ -152,7 +152,7 @@ class DS_1A1D_NORET let IsAtomicNoRet = 1; } -multiclass DS_1A1D_NORET_mc { +multiclass DS_1A1D_NORET_mc { def "" : DS_1A1D_NORET; let has_m0_read = 0 in { @@ -160,23 +160,23 @@ multiclass DS_1A1D_NORET_mc { } } -multiclass DS_1A1D_NORET_t16 +multiclass DS_1A1D_NORET_t16 : DS_1A1D_NORET_mc { let has_m0_read = 0 in { let True16Predicate = UseRealTrue16Insts in { - def "_t16" : DS_1A1D_NORET, + def "_t16" : DS_1A1D_NORET, True16D16Table; } } } -multiclass DS_1A1D_NORET_mc_gfx9 { +multiclass DS_1A1D_NORET_mc_gfx9 { let has_m0_read = 0 in { def "" : DS_1A1D_NORET; } } -class DS_1A2D_NORET +class DS_1A2D_NORET : DS_Pseudo let IsAtomicNoRet = 1; } -multiclass DS_1A2D_NORET_mc { +// DS_xx2D cases should only be instantiated with VGPR operand classes. +multiclass DS_1A2D_NORET_mc { + assert OperandIsVGPR.ret, + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_NORET; let has_m0_read = 0 in { @@ -194,12 +198,12 @@ multiclass DS_1A2D_NORET_mc { // All data operands are replaced with AGPRs in this form. let SubtargetPredicate = isGFX90APlus in { - def _agpr : DS_1A2D_NORET.ret>; + def _agpr : DS_1A2D_NORET.ret>; } } } -class DS_1A2D_Off8_NORET +class DS_1A2D_Off8_NORET : DS_Pseudo let has_offset = 0; } -multiclass DS_1A2D_Off8_NORET_mc { +multiclass DS_1A2D_Off8_NORET_mc { + assert OperandIsVGPR.ret, + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_Off8_NORET; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_Off8_NORET; let SubtargetPredicate = isGFX90APlus in { - def _agpr : DS_1A2D_Off8_NORET.ret>; + def _agpr : DS_1A2D_Off8_NORET.ret>; } } } -class DS_0A1D_RET_GDS.ret, - RegisterOperand src_op = getLdStRegisterOperand.ret> +class DS_0A1D_RET_GDS : DS_Pseudo.ret> +class DS_1A1D_RET : DS_Pseudo { +multiclass DS_1A1D_RET_mc { def "" : DS_1A1D_RET; let has_m0_read = 0 in { @@ -256,15 +261,15 @@ multiclass DS_1A1D_RET_mc { } } -multiclass DS_1A1D_RET_mc_gfx9 { +multiclass DS_1A1D_RET_mc_gfx9 { let has_m0_read = 0 in { def "" : DS_1A1D_RET; } } class DS_1A2D_RET: DS_Pseudo: DS_Pseudo { @@ -273,20 +278,23 @@ class DS_1A2D_RET { + RegisterOperand dst_rc = VGPROp_32, + RegisterOperand src_rc = dst_rc> { + assert !and(OperandIsVGPR.ret, OperandIsVGPR.ret), + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_RET; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_RET; - def _agpr : DS_1A2D_RET.ret, - getEquivalentAGPRClass.ret>; + def _agpr : DS_1A2D_RET.ret, + getEquivalentAGPROperand.ret>; } } class DS_1A2D_Off8_RET + RegisterOperand dst_rc = VGPROp_32, + RegisterOperand src_rc = dst_rc> : DS_Pseudo { + RegisterOperand dst_rc = VGPROp_32, + RegisterOperand src_rc = dst_rc> { + assert !and(OperandIsVGPR.ret, OperandIsVGPR.ret) , + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A2D_Off8_RET; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_Off8_RET; - def _agpr : DS_1A2D_Off8_RET.ret, - getEquivalentAGPRClass.ret>; + def _agpr : DS_1A2D_Off8_RET.ret, + getEquivalentAGPROperand.ret>; } } class DS_BVH_STACK + RegisterOperand vdst_rc, + RegisterOperand data1_rc> : DS_Pseudo.ret:$vdst, VGPR_32:$addr), - (ins VGPR_32:$addr_in, getLdStRegisterOperand.ret:$data0, - data1_rc:$data1, Offset:$offset), + (outs vdst_rc:$vdst, VGPR_32:$addr), + (ins VGPR_32:$addr_in, VGPR_32:$data0, data1_rc:$data1, Offset:$offset), " $vdst, $addr, $data0, $data1$offset"> { let Constraints = "$addr = $addr_in"; let has_gds = 0; @@ -323,8 +333,8 @@ class DS_BVH_STACK.ret> +class DS_1A_RET : DS_Pseudo { +multiclass DS_1A_RET_mc { def "" : DS_1A_RET; let has_m0_read = 0 in { @@ -344,27 +355,28 @@ multiclass DS_1A_RET_mc +multiclass DS_1A_RET_t16 : DS_1A_RET_mc { let has_m0_read = 0 in { let True16Predicate = UseRealTrue16Insts in { - def "_t16" : DS_1A_RET, True16D16Table; + def "_t16" : DS_1A_RET, True16D16Table; } } } -multiclass DS_1A_RET_NoM0 { +multiclass DS_1A_RET_NoM0 { let has_m0_read = 0 in { def "" : DS_1A_RET; } } -class DS_1A_RET_Tied : +class DS_1A_RET_Tied : DS_1A_RET; -class DS_1A_Off8_RET +class DS_1A_Off8_RET : DS_Pseudo.ret:$vdst), + (outs rc:$vdst), (ins VGPR_32:$addr, Offset0:$offset0, Offset1:$offset1, gds:$gds), " $vdst, $addr$offset0$offset1$gds"> { @@ -373,7 +385,7 @@ class DS_1A_Off8_RET let has_data1 = 0; } -multiclass DS_1A_Off8_RET_mc { +multiclass DS_1A_Off8_RET_mc { def "" : DS_1A_Off8_RET; let has_m0_read = 0 in { @@ -382,7 +394,7 @@ multiclass DS_1A_Off8_RET_mc { } class DS_1A_RET_GDS : DS_Pseudo.ret:$vdst), + (outs AVLdSt_32:$vdst), (ins VGPR_32:$addr, Offset:$offset), " $vdst, $addr$offset gds"> { @@ -407,7 +419,7 @@ class DS_1A_Off16_NORET } class DS_0A_RET : DS_Pseudo.ret:$vdst), + (outs AVLdSt_32:$vdst), (ins Offset:$offset, gds:$gds), " $vdst$offset$gds"> { @@ -462,7 +474,7 @@ class DS_GWS_0D class DS_GWS_1D : DS_GWS.ret:$data0, Offset:$offset), + (ins AVLdSt_32:$data0, Offset:$offset), " $data0$offset gds"> { let has_gws_data0 = 1; @@ -487,7 +499,7 @@ class DS_VOID : DS_Pseudo.ret> + RegisterOperand data_op = AVLdSt_32> : DS_Pseudo; } // End mayLoad = 0 let SubtargetPredicate = HasLdsAtomicAddF64 in { - defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>; - defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64>; + defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", AVLdSt_64>; + defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", AVLdSt_64>; } // End SubtargetPredicate = HasLdsAtomicAddF64 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in { defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">; - defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32>; + defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16">; defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">; - defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32>; + defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16">; } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">; defm DS_CMPSTORE_F32 : DS_1A2D_NORET_mc<"ds_cmpstore_f32">; -defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>; -defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>; -defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32>; -defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32>; -defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64>; -defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64>; +defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VGPROp_64>; +defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VGPROp_64>; +defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32">; +defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32">; +defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VGPROp_64>; +defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VGPROp_64>; defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">; defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">; defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">; -defm DS_ADD_U64 : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>; -defm DS_SUB_U64 : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>; -defm DS_RSUB_U64 : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>; -defm DS_INC_U64 : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>; -defm DS_DEC_U64 : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>; -defm DS_MIN_I64 : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>; -defm DS_MAX_I64 : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>; -defm DS_MIN_U64 : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>; -defm DS_MAX_U64 : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>; -defm DS_AND_B64 : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>; -defm DS_OR_B64 : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>; -defm DS_XOR_B64 : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>; -defm DS_MSKOR_B64 : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>; +defm DS_ADD_U64 : DS_1A1D_NORET_mc<"ds_add_u64", AVLdSt_64>; +defm DS_SUB_U64 : DS_1A1D_NORET_mc<"ds_sub_u64", AVLdSt_64>; +defm DS_RSUB_U64 : DS_1A1D_NORET_mc<"ds_rsub_u64", AVLdSt_64>; +defm DS_INC_U64 : DS_1A1D_NORET_mc<"ds_inc_u64", AVLdSt_64>; +defm DS_DEC_U64 : DS_1A1D_NORET_mc<"ds_dec_u64", AVLdSt_64>; +defm DS_MIN_I64 : DS_1A1D_NORET_mc<"ds_min_i64", AVLdSt_64>; +defm DS_MAX_I64 : DS_1A1D_NORET_mc<"ds_max_i64", AVLdSt_64>; +defm DS_MIN_U64 : DS_1A1D_NORET_mc<"ds_min_u64", AVLdSt_64>; +defm DS_MAX_U64 : DS_1A1D_NORET_mc<"ds_max_u64", AVLdSt_64>; +defm DS_AND_B64 : DS_1A1D_NORET_mc<"ds_and_b64", AVLdSt_64>; +defm DS_OR_B64 : DS_1A1D_NORET_mc<"ds_or_b64", AVLdSt_64>; +defm DS_XOR_B64 : DS_1A1D_NORET_mc<"ds_xor_b64", AVLdSt_64>; +defm DS_MSKOR_B64 : DS_1A2D_NORET_mc<"ds_mskor_b64", VGPROp_64>; let mayLoad = 0 in { -defm DS_WRITE_B64 : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>; -defm DS_WRITE2_B64 : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>; -defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>; +defm DS_WRITE_B64 : DS_1A1D_NORET_mc<"ds_write_b64", AVLdSt_64>; +defm DS_WRITE2_B64 : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VGPROp_64>; +defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VGPROp_64>; } -defm DS_CMPST_B64 : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>; -defm DS_CMPST_F64 : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>; -defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>; -defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>; +defm DS_CMPST_B64 : DS_1A2D_NORET_mc<"ds_cmpst_b64", VGPROp_64>; +defm DS_CMPST_F64 : DS_1A2D_NORET_mc<"ds_cmpst_f64", VGPROp_64>; +defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", AVLdSt_64>; +defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", AVLdSt_64>; -defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32>; +defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32">; let SubtargetPredicate = HasLDSFPAtomicAddF32 in { -defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32>; -} -defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32>; -defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32>; -defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32>; -defm DS_DEC_RTN_U32 : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32>; -defm DS_MIN_RTN_I32 : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32>; -defm DS_MAX_RTN_I32 : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32>; -defm DS_MIN_RTN_U32 : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32>; -defm DS_MAX_RTN_U32 : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32>; -defm DS_AND_RTN_B32 : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32>; -defm DS_OR_RTN_B32 : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32>; -defm DS_XOR_RTN_B32 : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32>; -defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32>; -defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32>; -defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32>; -defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32>; -defm DS_MAX_RTN_F32 : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32>; +defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32">; +} +defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32">; +defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32">; +defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32">; +defm DS_DEC_RTN_U32 : DS_1A1D_RET_mc<"ds_dec_rtn_u32">; +defm DS_MIN_RTN_I32 : DS_1A1D_RET_mc<"ds_min_rtn_i32">; +defm DS_MAX_RTN_I32 : DS_1A1D_RET_mc<"ds_max_rtn_i32">; +defm DS_MIN_RTN_U32 : DS_1A1D_RET_mc<"ds_min_rtn_u32">; +defm DS_MAX_RTN_U32 : DS_1A1D_RET_mc<"ds_max_rtn_u32">; +defm DS_AND_RTN_B32 : DS_1A1D_RET_mc<"ds_and_rtn_b32">; +defm DS_OR_RTN_B32 : DS_1A1D_RET_mc<"ds_or_rtn_b32">; +defm DS_XOR_RTN_B32 : DS_1A1D_RET_mc<"ds_xor_rtn_b32">; +defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPROp_32>; +defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPROp_32>; +defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPROp_32>; +defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc<"ds_min_rtn_f32">; +defm DS_MAX_RTN_F32 : DS_1A1D_RET_mc<"ds_max_rtn_f32">; defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">; -defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>; -defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>; - -defm DS_ADD_RTN_U64 : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64>; -defm DS_SUB_RTN_U64 : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64>; -defm DS_RSUB_RTN_U64 : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64>; -defm DS_INC_RTN_U64 : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64>; -defm DS_DEC_RTN_U64 : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64>; -defm DS_MIN_RTN_I64 : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64>; -defm DS_MAX_RTN_I64 : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64>; -defm DS_MIN_RTN_U64 : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64>; -defm DS_MAX_RTN_U64 : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64>; -defm DS_AND_RTN_B64 : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64>; -defm DS_OR_RTN_B64 : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64>; -defm DS_XOR_RTN_B64 : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64>; -defm DS_MSKOR_RTN_B64 : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64>; -defm DS_CMPST_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64>; -defm DS_CMPST_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64>; -defm DS_MIN_RTN_F64 : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64>; -defm DS_MAX_RTN_F64 : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64>; - -defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>; -defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>; -defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>; +defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VGPROp_64, VGPROp_32>; +defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VGPROp_64, VGPROp_32>; + +defm DS_ADD_RTN_U64 : DS_1A1D_RET_mc<"ds_add_rtn_u64", AVLdSt_64>; +defm DS_SUB_RTN_U64 : DS_1A1D_RET_mc<"ds_sub_rtn_u64", AVLdSt_64>; +defm DS_RSUB_RTN_U64 : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", AVLdSt_64>; +defm DS_INC_RTN_U64 : DS_1A1D_RET_mc<"ds_inc_rtn_u64", AVLdSt_64>; +defm DS_DEC_RTN_U64 : DS_1A1D_RET_mc<"ds_dec_rtn_u64", AVLdSt_64>; +defm DS_MIN_RTN_I64 : DS_1A1D_RET_mc<"ds_min_rtn_i64", AVLdSt_64>; +defm DS_MAX_RTN_I64 : DS_1A1D_RET_mc<"ds_max_rtn_i64", AVLdSt_64>; +defm DS_MIN_RTN_U64 : DS_1A1D_RET_mc<"ds_min_rtn_u64", AVLdSt_64>; +defm DS_MAX_RTN_U64 : DS_1A1D_RET_mc<"ds_max_rtn_u64", AVLdSt_64>; +defm DS_AND_RTN_B64 : DS_1A1D_RET_mc<"ds_and_rtn_b64", AVLdSt_64>; +defm DS_OR_RTN_B64 : DS_1A1D_RET_mc<"ds_or_rtn_b64", AVLdSt_64>; +defm DS_XOR_RTN_B64 : DS_1A1D_RET_mc<"ds_xor_rtn_b64", AVLdSt_64>; +defm DS_MSKOR_RTN_B64 : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VGPROp_64>; +defm DS_CMPST_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VGPROp_64>; +defm DS_CMPST_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VGPROp_64>; +defm DS_MIN_RTN_F64 : DS_1A1D_RET_mc<"ds_min_rtn_f64", AVLdSt_64>; +defm DS_MAX_RTN_F64 : DS_1A1D_RET_mc<"ds_max_rtn_f64", AVLdSt_64>; + +defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", AVLdSt_64>; +defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VGPROp_128, VGPROp_64>; +defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VGPROp_128, VGPROp_64>; let isConvergent = 1, usesCustomInserter = 1 in { def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> { @@ -745,19 +757,19 @@ def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; } // End SubtargetPredicate = HasDsSrc2Insts let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { -def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>; +def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", AVLdSt_32, 0, Swizzle>; } let mayStore = 0 in { defm DS_READ_I16 : DS_1A_RET_mc<"ds_read_i16">; defm DS_READ_B32 : DS_1A_RET_mc<"ds_read_b32">; -defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", VReg_64>; +defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", AVLdSt_64>; -defm DS_READ2_B32 : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>; -defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>; +defm DS_READ2_B32 : DS_1A_Off8_RET_mc<"ds_read2_b32", AVLdSt_64>; +defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", AVLdSt_64>; -defm DS_READ2_B64 : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>; -defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>; +defm DS_READ2_B64 : DS_1A_Off8_RET_mc<"ds_read2_b64", AVLdSt_128>; +defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", AVLdSt_128>; let has_m0_read = 0 in { let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in { @@ -792,21 +804,21 @@ def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">; let SubtargetPredicate = isGFX7Plus in { -defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>; -defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>; +defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPROp_32>; +defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", AVLdSt_64>; let isConvergent = 1, usesCustomInserter = 1 in { def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">; } let mayStore = 0 in { -defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>; -defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>; +defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", AVLdSt_96>; +defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", AVLdSt_128>; } // End mayStore = 0 let mayLoad = 0 in { -defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>; -defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>; +defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", AVLdSt_96>; +defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", AVLdSt_128>; } // End mayLoad = 0 def DS_NOP : DS_VOID<"ds_nop">; @@ -839,8 +851,8 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; let SubtargetPredicate = isGFX11Only in { -def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>; -def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>; +def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VGPROp_64, VGPROp_32>; +def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VGPROp_64, VGPROp_32>; } // let SubtargetPredicate = isGFX11Only @@ -848,7 +860,7 @@ let SubtargetPredicate = isGFX11Plus in { let OtherPredicates = [HasImageInsts] in def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32", - VGPR_32, VReg_128> ; + VGPROp_32, VGPROp_128> ; } // let SubtargetPredicate = isGFX11Plus @@ -860,15 +872,15 @@ let SubtargetPredicate = isGFX12Plus in { let OtherPredicates = [HasImageInsts] in { def DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_BVH_STACK< - "ds_bvh_stack_push8_pop1_rtn_b32", VGPR_32, VReg_256>; + "ds_bvh_stack_push8_pop1_rtn_b32", VGPROp_32, VGPROp_256>; def DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_BVH_STACK< - "ds_bvh_stack_push8_pop2_rtn_b64", VReg_64, VReg_256>; + "ds_bvh_stack_push8_pop2_rtn_b64", VGPROp_64, VGPROp_256>; } // End OtherPredicates = [HasImageInsts]. defm DS_COND_SUB_U32 : DS_1A1D_NORET_mc_gfx9<"ds_cond_sub_u32">; -defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_cond_sub_rtn_u32", VGPR_32>; +defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_cond_sub_rtn_u32", VGPROp_32>; defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc_gfx9<"ds_sub_clamp_u32">; -defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_sub_clamp_rtn_u32", VGPR_32>; +defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_sub_clamp_rtn_u32", VGPROp_32>; def DS_BPERMUTE_FI_B32 : DS_1A1D_PERMUTE <"ds_bpermute_fi_b32", int_amdgcn_ds_bpermute_fi_b32>; @@ -889,11 +901,11 @@ let SubtargetPredicate = isGFX1250Plus in { let WaveSizePredicate = isWave32, mayStore = 0 in { let OtherPredicates = [HasTransposeLoadF4F6Insts] in { -defm DS_LOAD_TR4_B64 : DS_1A_RET_NoM0<"ds_load_tr4_b64", VReg_64>; -defm DS_LOAD_TR6_B96 : DS_1A_RET_NoM0<"ds_load_tr6_b96", VReg_96>; +defm DS_LOAD_TR4_B64 : DS_1A_RET_NoM0<"ds_load_tr4_b64", VGPROp_64>; +defm DS_LOAD_TR6_B96 : DS_1A_RET_NoM0<"ds_load_tr6_b96", VGPROp_96>; } // End OtherPredicates = [HasTransposeLoadF4F6Insts] -defm DS_LOAD_TR8_B64 : DS_1A_RET_NoM0<"ds_load_tr8_b64", VReg_64>; -defm DS_LOAD_TR16_B128 : DS_1A_RET_NoM0<"ds_load_tr16_b128", VReg_128>; +defm DS_LOAD_TR8_B64 : DS_1A_RET_NoM0<"ds_load_tr8_b64", VGPROp_64>; +defm DS_LOAD_TR16_B128 : DS_1A_RET_NoM0<"ds_load_tr16_b128", VGPROp_128>; } // End WaveSizePredicate = isWave32, mayStore = 0 let OtherPredicates = [HasLdsBarrierArriveAtomic] in { @@ -906,7 +918,7 @@ def : GCNPat < (DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 VGPR_32:$ptr, Offset:$offset, (i1 0)) >; -defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_1A1D_RET_mc_gfx9<"ds_atomic_barrier_arrive_rtn_b64", VReg_64>; +defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_1A1D_RET_mc_gfx9<"ds_atomic_barrier_arrive_rtn_b64", VGPROp_64>; def : GCNPat< (i64 (int_amdgcn_ds_atomic_barrier_arrive_rtn_b64 (DS1Addr1Offset i32:$ptr, i32:$offset), i64:$data)), @@ -917,10 +929,10 @@ def : GCNPat< } // End SubtargetPredicate = isGFX1250Plus let WaveSizePredicate = isWave64, SubtargetPredicate = HasGFX950Insts, mayStore = 0 in { - defm DS_READ_B64_TR_B4 : DS_1A_RET_NoM0<"ds_read_b64_tr_b4", VReg_64>; - defm DS_READ_B64_TR_B8 : DS_1A_RET_NoM0<"ds_read_b64_tr_b8", VReg_64>; - defm DS_READ_B64_TR_B16 : DS_1A_RET_NoM0<"ds_read_b64_tr_b16", VReg_64>; - defm DS_READ_B96_TR_B6 : DS_1A_RET_NoM0<"ds_read_b96_tr_b6", VReg_96>; + defm DS_READ_B64_TR_B4 : DS_1A_RET_NoM0<"ds_read_b64_tr_b4", AVLdSt_64>; + defm DS_READ_B64_TR_B8 : DS_1A_RET_NoM0<"ds_read_b64_tr_b8", AVLdSt_64>; + defm DS_READ_B64_TR_B16 : DS_1A_RET_NoM0<"ds_read_b64_tr_b16", AVLdSt_64>; + defm DS_READ_B96_TR_B6 : DS_1A_RET_NoM0<"ds_read_b96_tr_b6", AVLdSt_96>; } //===----------------------------------------------------------------------===// @@ -1072,6 +1084,7 @@ class DS64Bit4ByteAlignedReadPat : G (inst $ptr, $offset0, $offset1, (i1 0)) >; +// TODO: Should this use AVLdSt_64 for the class? class DS64Bit4ByteAlignedWritePat : GCNPat< (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i32:$offset0, i32:$offset1)), (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)), diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a7cf1faa60ce2..50d3b4baef38d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2606,6 +2606,17 @@ class getEquivalentAGPRClass { !eq(RC.Size, 1024) : AReg_1024); } +class getEquivalentAGPROperand { + defvar Size = RC.RegClass.Size; + RegisterOperand ret = + !cond(!eq(Size, 32) : RegisterOperand, + !eq(Size, 64) : RegisterOperand, + !eq(Size, 96) : RegisterOperand, + !eq(Size, 128) : RegisterOperand, + !eq(Size, 160) : RegisterOperand, + !eq(Size, 1024) : RegisterOperand); +} + class getHasVOP3DPP { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index fad428d1e43a5..dd6030769c03c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1344,6 +1344,16 @@ def VGPRSrc_16 : RegisterOperand { let EncoderMethod = "getMachineOpValueT16"; } +// TODO: These cases should use default target alignment +def VGPROp_16 : RegisterOperand; +def VGPROp_32 : RegisterOperand; + +foreach size = ["64", "96", "128", "256" ] in { + def VGPROp_#size : RegisterOperand("VReg_"#size)>; + def VGPROp_#size#_Align1 : RegisterOperand("VReg_"#size)>; + def VGPROp_#size#_Align2 : RegisterOperand("VReg_"#size#_Align2)>; +} + //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// @@ -1422,6 +1432,7 @@ def AVDst_512 : AVDstOperand; class AVLdStOperand : AVOperand; +// TODO: These cases should use target align variant def AVLdSt_32 : AVLdStOperand; def AVLdSt_64 : AVLdStOperand; def AVLdSt_96 : AVLdStOperand; @@ -1429,6 +1440,9 @@ def AVLdSt_128 : AVLdStOperand; def AVLdSt_160 : AVLdStOperand; def AVLdSt_1024 : AVLdStOperand; +def AVLdSt_96_Align1 : AVLdStOperand; +def AVLdSt_96_Align2 : AVLdStOperand; + //===----------------------------------------------------------------------===// // ACSrc_* Operands with an AGPR or an inline constant //===----------------------------------------------------------------------===//