Skip to content

Commit 201ed02

Browse files
committed
[AMDGPU] Make S_WAIT_EVENT a scheduling boundary
1 parent 5245f2a commit 201ed02

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4214,6 +4214,13 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
42144214
if (MI.getOpcode() == AMDGPU::SCHED_BARRIER && MI.getOperand(0).getImm() == 0)
42154215
return true;
42164216

4217+
// The scheduler does not understand what kind of external events this
4218+
// instruction waits for, so cannot do a good job of scheduling it. Making it
4219+
// a boundary allows front ends to insert it at an appropriate place without
4220+
// the scheduler arbitrarily moving it.
4221+
if (MI.getOpcode() == AMDGPU::S_WAIT_EVENT)
4222+
return true;
4223+
42174224
// Target-independent instructions do not have an implicit-use of EXEC, even
42184225
// when they operate on VGPRs. Treating EXEC modifications as scheduling
42194226
// boundaries prevents incorrect movements of such instructions.

llvm/test/CodeGen/AMDGPU/sched-wait-event.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,18 @@ define amdgpu_ps void @test_wait_event(i32 inreg %arg, float %arg1, float %arg2,
2222
; GFX12-NEXT: s_mov_b32 exec_lo, s0
2323
; GFX12-NEXT: v_interp_p10_f32 v4, v2, v1, v2 wait_exp:1
2424
; GFX12-NEXT: v_interp_p10_f32 v1, v3, v1, v3 wait_exp:0
25-
; GFX12-NEXT: s_wait_event 0x2
26-
; GFX12-NEXT: v_mov_b32_e32 v8, 0
2725
; GFX12-NEXT: v_interp_p2_f32 v4, v2, v0, v4 wait_exp:7
2826
; GFX12-NEXT: v_interp_p2_f32 v0, v3, v0, v1 wait_exp:7
2927
; GFX12-NEXT: v_mul_f32_e32 v1, 0x44800000, v4
3028
; GFX12-NEXT: v_mul_f32_e32 v0, 0x44800000, v0
3129
; GFX12-NEXT: v_cvt_i32_f32_e32 v1, v1
3230
; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
31+
; GFX12-NEXT: s_wait_event 0x2
3332
; GFX12-NEXT: image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D
3433
; GFX12-NEXT: s_wait_loadcnt 0x0
35-
; GFX12-NEXT: v_dual_mul_f32 v7, 0.5, v7 :: v_dual_mul_f32 v6, 0.5, v6
36-
; GFX12-NEXT: v_dual_mul_f32 v5, 0.5, v5 :: v_dual_mul_f32 v4, 0.5, v4
34+
; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mul_f32 v7, 0.5, v7
35+
; GFX12-NEXT: v_dual_mul_f32 v6, 0.5, v6 :: v_dual_mul_f32 v5, 0.5, v5
36+
; GFX12-NEXT: v_mul_f32_e32 v4, 0.5, v4
3737
; GFX12-NEXT: image_store v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D
3838
; GFX12-NEXT: s_wait_storecnt 0x0
3939
; GFX12-NEXT: export mrt0 v8, v8, v8, v8 done

0 commit comments

Comments
 (0)