@@ -4000,6 +4000,116 @@ bb:
40004000 ret void
40014001}
40024002
4003+ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset (ptr addrspace (5 ) inreg %sgpr_base , i32 inreg %sidx , i32 %vidx ) {
4004+ ; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4005+ ; GFX9: ; %bb.0: ; %bb
4006+ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4007+ ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4008+ ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4009+ ; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4010+ ; GFX9-NEXT: v_mov_b32_e32 v1, 15
4011+ ; GFX9-NEXT: scratch_store_dword v0, v1, off
4012+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
4013+ ; GFX9-NEXT: s_endpgm
4014+ ;
4015+ ; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4016+ ; GFX10: ; %bb.0: ; %bb
4017+ ; GFX10-NEXT: s_add_u32 s0, s0, s5
4018+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4019+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4020+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4021+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4022+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
4023+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4024+ ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4025+ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4026+ ; GFX10-NEXT: s_endpgm
4027+ ;
4028+ ; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4029+ ; GFX940: ; %bb.0: ; %bb
4030+ ; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4031+ ; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4032+ ; GFX940-NEXT: v_mov_b32_e32 v1, 15
4033+ ; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4034+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
4035+ ; GFX940-NEXT: s_endpgm
4036+ ;
4037+ ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4038+ ; GFX11: ; %bb.0: ; %bb
4039+ ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4040+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4041+ ; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4042+ ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4043+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4044+ ; GFX11-NEXT: s_endpgm
4045+ ;
4046+ ; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4047+ ; GFX12: ; %bb.0: ; %bb
4048+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4049+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4050+ ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4051+ ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4052+ ; GFX12-NEXT: s_wait_storecnt 0x0
4053+ ; GFX12-NEXT: s_endpgm
4054+ ;
4055+ ; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4056+ ; UNALIGNED_GFX9: ; %bb.0: ; %bb
4057+ ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4058+ ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4059+ ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4060+ ; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4061+ ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4062+ ; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4063+ ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4064+ ; UNALIGNED_GFX9-NEXT: s_endpgm
4065+ ;
4066+ ; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4067+ ; UNALIGNED_GFX10: ; %bb.0: ; %bb
4068+ ; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4069+ ; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4070+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4071+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4072+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4073+ ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4074+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4075+ ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4076+ ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4077+ ; UNALIGNED_GFX10-NEXT: s_endpgm
4078+ ;
4079+ ; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4080+ ; UNALIGNED_GFX940: ; %bb.0: ; %bb
4081+ ; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4082+ ; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4083+ ; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4084+ ; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4085+ ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4086+ ; UNALIGNED_GFX940-NEXT: s_endpgm
4087+ ;
4088+ ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4089+ ; UNALIGNED_GFX11: ; %bb.0: ; %bb
4090+ ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4091+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4092+ ; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4093+ ; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4094+ ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4095+ ; UNALIGNED_GFX11-NEXT: s_endpgm
4096+ ;
4097+ ; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4098+ ; UNALIGNED_GFX12: ; %bb.0: ; %bb
4099+ ; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4100+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4101+ ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4102+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4103+ ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4104+ ; UNALIGNED_GFX12-NEXT: s_endpgm
4105+ bb:
4106+ %add1 = add nsw i32 %sidx , %vidx
4107+ %add2 = add nsw i32 %add1 , -16
4108+ %gep = getelementptr inbounds [16 x i8 ], ptr addrspace (5 ) %sgpr_base , i32 0 , i32 %add2
4109+ store volatile i32 15 , ptr addrspace (5 ) %gep , align 4
4110+ ret void
4111+ }
4112+
40034113define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
40044114; GFX9-LABEL: sgpr_base_negative_offset:
40054115; GFX9: ; %bb.0: ; %entry
0 commit comments