@@ -6846,6 +6846,75 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
68466846 ret float %val
68476847}
68486848
6849+ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm (<4 x i32 > inreg %rsrc , i32 inreg %offset.s ) {
6850+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6851+ ; GFX6: bb.1 (%ir-block.0):
6852+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6853+ ; GFX6-NEXT: {{ $}}
6854+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6855+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6856+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6857+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6858+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6859+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6860+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6861+ ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6862+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
6863+ ; GFX6-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
6864+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6865+ ;
6866+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6867+ ; GFX7: bb.1 (%ir-block.0):
6868+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6869+ ; GFX7-NEXT: {{ $}}
6870+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6871+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6872+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6873+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6874+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6875+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6876+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6877+ ; GFX7-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6878+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
6879+ ; GFX7-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
6880+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6881+ ;
6882+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6883+ ; GFX8: bb.1 (%ir-block.0):
6884+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6885+ ; GFX8-NEXT: {{ $}}
6886+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6887+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6888+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6889+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6890+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6891+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6892+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6893+ ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6894+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
6895+ ; GFX8-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
6896+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6897+ ;
6898+ ; GFX12-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6899+ ; GFX12: bb.1 (%ir-block.0):
6900+ ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6901+ ; GFX12-NEXT: {{ $}}
6902+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6903+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6904+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6905+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6906+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6907+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6908+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6909+ ; GFX12-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6910+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_OR_B32_]], 0, 0 :: (dereferenceable invariant load (s32))
6911+ ; GFX12-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
6912+ ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6913+ %offset = or i32 %offset.s , -2147483648
6914+ %val = call float @llvm.amdgcn.s.buffer.load.f32 (<4 x i32 > %rsrc , i32 %offset , i32 0 )
6915+ ret float %val
6916+ }
6917+
68496918declare i32 @llvm.amdgcn.s.buffer.load.i32 (<4 x i32 >, i32 , i32 immarg)
68506919declare <2 x i32 > @llvm.amdgcn.s.buffer.load.v2i32 (<4 x i32 >, i32 , i32 immarg)
68516920declare <3 x i32 > @llvm.amdgcn.s.buffer.load.v3i32 (<4 x i32 >, i32 , i32 immarg)
0 commit comments