@@ -3990,6 +3990,116 @@ bb:
39903990 ret void
39913991}
39923992
3993+ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset (ptr addrspace (5 ) inreg %sgpr_base , i32 inreg %sidx , i32 %vidx ) {
3994+ ; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
3995+ ; GFX9: ; %bb.0: ; %bb
3996+ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
3997+ ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
3998+ ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
3999+ ; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4000+ ; GFX9-NEXT: v_mov_b32_e32 v1, 15
4001+ ; GFX9-NEXT: scratch_store_dword v0, v1, off
4002+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
4003+ ; GFX9-NEXT: s_endpgm
4004+ ;
4005+ ; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4006+ ; GFX10: ; %bb.0: ; %bb
4007+ ; GFX10-NEXT: s_add_u32 s0, s0, s5
4008+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4009+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4010+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4011+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4012+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
4013+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4014+ ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4015+ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4016+ ; GFX10-NEXT: s_endpgm
4017+ ;
4018+ ; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4019+ ; GFX940: ; %bb.0: ; %bb
4020+ ; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4021+ ; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4022+ ; GFX940-NEXT: v_mov_b32_e32 v1, 15
4023+ ; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4024+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
4025+ ; GFX940-NEXT: s_endpgm
4026+ ;
4027+ ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4028+ ; GFX11: ; %bb.0: ; %bb
4029+ ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4030+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4031+ ; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4032+ ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4033+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4034+ ; GFX11-NEXT: s_endpgm
4035+ ;
4036+ ; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4037+ ; GFX12: ; %bb.0: ; %bb
4038+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4039+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4040+ ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4041+ ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4042+ ; GFX12-NEXT: s_wait_storecnt 0x0
4043+ ; GFX12-NEXT: s_endpgm
4044+ ;
4045+ ; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4046+ ; UNALIGNED_GFX9: ; %bb.0: ; %bb
4047+ ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4048+ ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4049+ ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4050+ ; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4051+ ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4052+ ; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4053+ ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4054+ ; UNALIGNED_GFX9-NEXT: s_endpgm
4055+ ;
4056+ ; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4057+ ; UNALIGNED_GFX10: ; %bb.0: ; %bb
4058+ ; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4059+ ; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4060+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4061+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4062+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4063+ ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4064+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4065+ ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4066+ ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4067+ ; UNALIGNED_GFX10-NEXT: s_endpgm
4068+ ;
4069+ ; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4070+ ; UNALIGNED_GFX940: ; %bb.0: ; %bb
4071+ ; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4072+ ; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4073+ ; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4074+ ; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4075+ ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4076+ ; UNALIGNED_GFX940-NEXT: s_endpgm
4077+ ;
4078+ ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4079+ ; UNALIGNED_GFX11: ; %bb.0: ; %bb
4080+ ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4081+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4082+ ; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4083+ ; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4084+ ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4085+ ; UNALIGNED_GFX11-NEXT: s_endpgm
4086+ ;
4087+ ; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4088+ ; UNALIGNED_GFX12: ; %bb.0: ; %bb
4089+ ; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4090+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4091+ ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4092+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4093+ ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4094+ ; UNALIGNED_GFX12-NEXT: s_endpgm
4095+ bb:
4096+ %add1 = add nsw i32 %sidx , %vidx
4097+ %add2 = add nsw i32 %add1 , -16
4098+ %gep = getelementptr inbounds [16 x i8 ], ptr addrspace (5 ) %sgpr_base , i32 0 , i32 %add2
4099+ store volatile i32 15 , ptr addrspace (5 ) %gep , align 4
4100+ ret void
4101+ }
4102+
39934103define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
39944104; GFX9-LABEL: sgpr_base_negative_offset:
39954105; GFX9: ; %bb.0: ; %entry
0 commit comments