@@ -4000,116 +4000,6 @@ bb:
40004000 ret void
40014001}
40024002
4003- define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset (ptr addrspace (5 ) inreg %sgpr_base , i32 inreg %sidx , i32 %vidx ) {
4004- ; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4005- ; GFX9: ; %bb.0: ; %bb
4006- ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4007- ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4008- ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4009- ; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4010- ; GFX9-NEXT: v_mov_b32_e32 v1, 15
4011- ; GFX9-NEXT: scratch_store_dword v0, v1, off
4012- ; GFX9-NEXT: s_waitcnt vmcnt(0)
4013- ; GFX9-NEXT: s_endpgm
4014- ;
4015- ; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4016- ; GFX10: ; %bb.0: ; %bb
4017- ; GFX10-NEXT: s_add_u32 s0, s0, s5
4018- ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4019- ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4020- ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4021- ; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4022- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
4023- ; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4024- ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4025- ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4026- ; GFX10-NEXT: s_endpgm
4027- ;
4028- ; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4029- ; GFX940: ; %bb.0: ; %bb
4030- ; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4031- ; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4032- ; GFX940-NEXT: v_mov_b32_e32 v1, 15
4033- ; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4034- ; GFX940-NEXT: s_waitcnt vmcnt(0)
4035- ; GFX940-NEXT: s_endpgm
4036- ;
4037- ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4038- ; GFX11: ; %bb.0: ; %bb
4039- ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4040- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4041- ; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4042- ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4043- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4044- ; GFX11-NEXT: s_endpgm
4045- ;
4046- ; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4047- ; GFX12: ; %bb.0: ; %bb
4048- ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4049- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4050- ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4051- ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4052- ; GFX12-NEXT: s_wait_storecnt 0x0
4053- ; GFX12-NEXT: s_endpgm
4054- ;
4055- ; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4056- ; UNALIGNED_GFX9: ; %bb.0: ; %bb
4057- ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4058- ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4059- ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4060- ; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4061- ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4062- ; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4063- ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4064- ; UNALIGNED_GFX9-NEXT: s_endpgm
4065- ;
4066- ; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4067- ; UNALIGNED_GFX10: ; %bb.0: ; %bb
4068- ; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4069- ; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4070- ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4071- ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4072- ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4073- ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4074- ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4075- ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4076- ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4077- ; UNALIGNED_GFX10-NEXT: s_endpgm
4078- ;
4079- ; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4080- ; UNALIGNED_GFX940: ; %bb.0: ; %bb
4081- ; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4082- ; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4083- ; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4084- ; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4085- ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4086- ; UNALIGNED_GFX940-NEXT: s_endpgm
4087- ;
4088- ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4089- ; UNALIGNED_GFX11: ; %bb.0: ; %bb
4090- ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4091- ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4092- ; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4093- ; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4094- ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4095- ; UNALIGNED_GFX11-NEXT: s_endpgm
4096- ;
4097- ; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4098- ; UNALIGNED_GFX12: ; %bb.0: ; %bb
4099- ; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4100- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4101- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4102- ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4103- ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4104- ; UNALIGNED_GFX12-NEXT: s_endpgm
4105- bb:
4106- %add1 = add nsw i32 %sidx , %vidx
4107- %add2 = add nsw i32 %add1 , -16
4108- %gep = getelementptr inbounds [16 x i8 ], ptr addrspace (5 ) %sgpr_base , i32 0 , i32 %add2
4109- store volatile i32 15 , ptr addrspace (5 ) %gep , align 4
4110- ret void
4111- }
4112-
41134003define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
41144004; GFX9-LABEL: sgpr_base_negative_offset:
41154005; GFX9: ; %bb.0: ; %entry
0 commit comments