Skip to content

Commit b6d4a5c

Browse files
committed
Simplify selection of load instructions.
1 parent 53c3b21 commit b6d4a5c

File tree

8 files changed

+881
-870
lines changed

8 files changed

+881
-870
lines changed

llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,6 @@ class BarrierLatency : public ScheduleDAGMutation {
3232
void apply(ScheduleDAGInstrs *DAG) override;
3333
};
3434

35-
static bool isMemLoad(const MachineInstr *MI) {
36-
auto isLoad = [](const MachineInstr *MI) {
37-
return (SIInstrInfo::isDS(*MI) || SIInstrInfo::isVMEM(*MI) ||
38-
SIInstrInfo::isSMRD(*MI)) &&
39-
MI->mayLoad();
40-
};
41-
42-
if (MI->isBundle()) {
43-
auto I = std::next(MI->getIterator());
44-
return I != MI->getParent()->instr_end() && I->isInsideBundle() &&
45-
isLoad(&*I);
46-
}
47-
48-
return isLoad(MI);
49-
}
50-
5135
void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
5236
const unsigned SyntheticLatency = 2000;
5337
for (SUnit &SU : DAG->SUnits) {
@@ -62,7 +46,9 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
6246
if (!PredDep.isBarrier())
6347
continue;
6448
SUnit *PredSU = PredDep.getSUnit();
65-
if (!isMemLoad(PredSU->getInstr()))
49+
MachineInstr *MI = PredSU->getInstr();
50+
// Only consider memory loads
51+
if (!MI->mayLoad() || MI->mayStore())
6652
continue;
6753
SDep ForwardD = PredDep;
6854
ForwardD.setSUnit(&SU);

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,9 +1528,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15281528
; GFX942-NEXT: buffer_wbl2 sc1
15291529
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
15301530
; GFX942-NEXT: s_waitcnt vmcnt(0)
1531+
; GFX942-NEXT: buffer_inv sc1
15311532
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
15321533
; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1533-
; GFX942-NEXT: buffer_inv sc1
15341534
; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5]
15351535
; GFX942-NEXT: s_cbranch_execnz .LBB12_1
15361536
; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1576,9 +1576,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15761576
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1]
15771577
; GFX90A-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
15781578
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1579+
; GFX90A-NEXT: buffer_wbinvl1
15791580
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
15801581
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1581-
; GFX90A-NEXT: buffer_wbinvl1
15821582
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
15831583
; GFX90A-NEXT: s_cbranch_execnz .LBB12_1
15841584
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1603,9 +1603,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
16031603
; GFX908-NEXT: v_mov_b32_e32 v1, v5
16041604
; GFX908-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
16051605
; GFX908-NEXT: s_waitcnt vmcnt(0)
1606+
; GFX908-NEXT: buffer_wbinvl1
16061607
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
16071608
; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1608-
; GFX908-NEXT: buffer_wbinvl1
16091609
; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
16101610
; GFX908-NEXT: s_cbranch_execnz .LBB12_1
16111611
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1630,9 +1630,9 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
16301630
; GFX8-NEXT: v_mov_b32_e32 v1, v5
16311631
; GFX8-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
16321632
; GFX8-NEXT: s_waitcnt vmcnt(0)
1633+
; GFX8-NEXT: buffer_wbinvl1
16331634
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
16341635
; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1635-
; GFX8-NEXT: buffer_wbinvl1
16361636
; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
16371637
; GFX8-NEXT: s_cbranch_execnz .LBB12_1
16381638
; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1683,10 +1683,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
16831683
; GFX942-NEXT: buffer_wbl2 sc1
16841684
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
16851685
; GFX942-NEXT: s_waitcnt vmcnt(0)
1686+
; GFX942-NEXT: buffer_inv sc1
16861687
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1687-
; GFX942-NEXT: v_mov_b32_e32 v1, v4
16881688
; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1689-
; GFX942-NEXT: buffer_inv sc1
1689+
; GFX942-NEXT: v_mov_b32_e32 v1, v4
16901690
; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5]
16911691
; GFX942-NEXT: s_cbranch_execnz .LBB13_1
16921692
; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1730,10 +1730,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17301730
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1]
17311731
; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
17321732
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1733+
; GFX90A-NEXT: buffer_wbinvl1
17331734
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1734-
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
17351735
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1736-
; GFX90A-NEXT: buffer_wbinvl1
1736+
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
17371737
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
17381738
; GFX90A-NEXT: s_cbranch_execnz .LBB13_1
17391739
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1756,10 +1756,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17561756
; GFX908-NEXT: v_mov_b32_e32 v4, v0
17571757
; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
17581758
; GFX908-NEXT: s_waitcnt vmcnt(0)
1759+
; GFX908-NEXT: buffer_wbinvl1
17591760
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1760-
; GFX908-NEXT: v_mov_b32_e32 v1, v4
17611761
; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1762-
; GFX908-NEXT: buffer_wbinvl1
1762+
; GFX908-NEXT: v_mov_b32_e32 v1, v4
17631763
; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
17641764
; GFX908-NEXT: s_cbranch_execnz .LBB13_1
17651765
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1782,10 +1782,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
17821782
; GFX8-NEXT: v_mov_b32_e32 v4, v0
17831783
; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
17841784
; GFX8-NEXT: s_waitcnt vmcnt(0)
1785+
; GFX8-NEXT: buffer_wbinvl1
17851786
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
1786-
; GFX8-NEXT: v_mov_b32_e32 v1, v4
17871787
; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1788-
; GFX8-NEXT: buffer_wbinvl1
1788+
; GFX8-NEXT: v_mov_b32_e32 v1, v4
17891789
; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
17901790
; GFX8-NEXT: s_cbranch_execnz .LBB13_1
17911791
; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1830,9 +1830,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18301830
; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18311831
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
18321832
; GFX12-NEXT: s_wait_loadcnt 0x0
1833+
; GFX12-NEXT: global_inv scope:SCOPE_DEV
18331834
; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
18341835
; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4
1835-
; GFX12-NEXT: global_inv scope:SCOPE_DEV
18361836
; GFX12-NEXT: s_wait_alu 0xfffe
18371837
; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
18381838
; GFX12-NEXT: s_cbranch_execnz .LBB14_1
@@ -1872,10 +1872,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18721872
; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18731873
; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc
18741874
; GFX11-NEXT: s_waitcnt vmcnt(0)
1875-
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
1876-
; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4
18771875
; GFX11-NEXT: buffer_gl1_inv
18781876
; GFX11-NEXT: buffer_gl0_inv
1877+
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
1878+
; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4
1879+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18791880
; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
18801881
; GFX11-NEXT: s_cbranch_execnz .LBB14_1
18811882
; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1924,9 +1925,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19241925
; GFX908-NEXT: v_mov_b32_e32 v3, v10
19251926
; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19261927
; GFX908-NEXT: s_waitcnt vmcnt(0)
1928+
; GFX908-NEXT: buffer_wbinvl1
19271929
; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
19281930
; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1929-
; GFX908-NEXT: buffer_wbinvl1
19301931
; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
19311932
; GFX908-NEXT: s_cbranch_execnz .LBB14_1
19321933
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1955,9 +1956,9 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19551956
; GFX8-NEXT: v_mov_b32_e32 v3, v10
19561957
; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19571958
; GFX8-NEXT: s_waitcnt vmcnt(0)
1959+
; GFX8-NEXT: buffer_wbinvl1
19581960
; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
19591961
; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1960-
; GFX8-NEXT: buffer_wbinvl1
19611962
; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
19621963
; GFX8-NEXT: s_cbranch_execnz .LBB14_1
19631964
; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -1999,10 +2000,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
19992000
; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20002001
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
20012002
; GFX12-NEXT: s_wait_loadcnt 0x0
2003+
; GFX12-NEXT: global_inv scope:SCOPE_DEV
20022004
; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3]
20032005
; GFX12-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8
20042006
; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4
2005-
; GFX12-NEXT: global_inv scope:SCOPE_DEV
20062007
; GFX12-NEXT: s_wait_alu 0xfffe
20072008
; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
20082009
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
@@ -2039,11 +2040,12 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20392040
; GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20402041
; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc
20412042
; GFX11-NEXT: s_waitcnt vmcnt(0)
2043+
; GFX11-NEXT: buffer_gl1_inv
2044+
; GFX11-NEXT: buffer_gl0_inv
20422045
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3]
20432046
; GFX11-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8
20442047
; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4
2045-
; GFX11-NEXT: buffer_gl1_inv
2046-
; GFX11-NEXT: buffer_gl0_inv
2048+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
20472049
; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
20482050
; GFX11-NEXT: s_cbranch_execnz .LBB15_1
20492051
; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2088,11 +2090,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20882090
; GFX908-NEXT: v_mov_b32_e32 v7, v0
20892091
; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
20902092
; GFX908-NEXT: s_waitcnt vmcnt(0)
2093+
; GFX908-NEXT: buffer_wbinvl1
20912094
; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3]
20922095
; GFX908-NEXT: v_mov_b32_e32 v2, v7
2093-
; GFX908-NEXT: v_mov_b32_e32 v3, v8
20942096
; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2095-
; GFX908-NEXT: buffer_wbinvl1
2097+
; GFX908-NEXT: v_mov_b32_e32 v3, v8
20962098
; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
20972099
; GFX908-NEXT: s_cbranch_execnz .LBB15_1
20982100
; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2117,11 +2119,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
21172119
; GFX8-NEXT: v_mov_b32_e32 v7, v0
21182120
; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
21192121
; GFX8-NEXT: s_waitcnt vmcnt(0)
2122+
; GFX8-NEXT: buffer_wbinvl1
21202123
; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3]
21212124
; GFX8-NEXT: v_mov_b32_e32 v2, v7
2122-
; GFX8-NEXT: v_mov_b32_e32 v3, v8
21232125
; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2124-
; GFX8-NEXT: buffer_wbinvl1
2126+
; GFX8-NEXT: v_mov_b32_e32 v3, v8
21252127
; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
21262128
; GFX8-NEXT: s_cbranch_execnz .LBB15_1
21272129
; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end

0 commit comments

Comments
 (0)