Skip to content

Commit 7708bb6

Browse files
committed
[AMDGPU][SIInsertWaitCnts] test changes, use jayfoad patch
1 parent a1ee5b8 commit 7708bb6

File tree

10 files changed

+25
-74
lines changed

10 files changed

+25
-74
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ bool WaitcntBrackets::canOptimizeXCntWithLoadCnt(const AMDGPU::Waitcnt &Wait) {
13011301
// stores. VMEM loads retun in order, so if we only have loads XCnt is
13021302
// decremented to the same number as LOADCnt.
13031303
return Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
1304-
!hasPendingEvent(STORE_CNT) && !hasPendingEvent(SMEM_GROUP);
1304+
!hasPendingEvent(STORE_CNT);
13051305
}
13061306

13071307
void WaitcntBrackets::simplifyXcnt(AMDGPU::Waitcnt &CheckWait,
@@ -1311,15 +1311,17 @@ void WaitcntBrackets::simplifyXcnt(AMDGPU::Waitcnt &CheckWait,
13111311
// be pending SMEM and VMEM events active at the same time.
13121312
// In such cases, only clear one active event at a time.
13131313
if (hasRedundantXCntWithKmCnt(CheckWait)) {
1314-
if (hasPendingEvent(VMEM_GROUP)) {
1315-
// Only clear the SMEM_GROUP event, but VMEM_GROUP could still require
1316-
// handling.
1317-
PendingEvents &= ~(1 << SMEM_GROUP);
1318-
} else {
1314+
if (!hasMixedPendingEvents(X_CNT)) {
13191315
applyWaitcnt(X_CNT, 0);
1316+
} else {
1317+
PendingEvents &= ~(1 << SMEM_GROUP);
13201318
}
13211319
} else if (canOptimizeXCntWithLoadCnt(CheckWait)) {
1322-
applyWaitcnt(X_CNT, std::min(CheckWait.XCnt, CheckWait.LoadCnt));
1320+
if (!hasMixedPendingEvents(X_CNT)) {
1321+
applyWaitcnt(X_CNT, std::min(CheckWait.XCnt, CheckWait.LoadCnt));
1322+
} else if (CheckWait.LoadCnt == 0) {
1323+
PendingEvents &= ~(1 << VMEM_GROUP);
1324+
}
13231325
}
13241326
simplifyWaitcnt(X_CNT, UpdateWait.XCnt);
13251327
}

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,7 +1501,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
15011501
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
15021502
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
15031503
; GFX1250-NEXT: s_wait_storecnt 0x0
1504-
; GFX1250-NEXT: s_wait_xcnt 0x0
15051504
; GFX1250-NEXT: s_wait_kmcnt 0x0
15061505
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
15071506
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1574,7 +1573,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
15741573
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
15751574
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
15761575
; GFX1250-NEXT: s_wait_storecnt 0x0
1577-
; GFX1250-NEXT: s_wait_xcnt 0x0
15781576
; GFX1250-NEXT: s_wait_kmcnt 0x0
15791577
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
15801578
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1649,7 +1647,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
16491647
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
16501648
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
16511649
; GFX1250-NEXT: s_wait_storecnt 0x0
1652-
; GFX1250-NEXT: s_wait_xcnt 0x0
16531650
; GFX1250-NEXT: s_wait_kmcnt 0x0
16541651
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
16551652
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1722,7 +1719,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
17221719
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
17231720
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
17241721
; GFX1250-NEXT: s_wait_storecnt 0x0
1725-
; GFX1250-NEXT: s_wait_xcnt 0x0
17261722
; GFX1250-NEXT: s_wait_kmcnt 0x0
17271723
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
17281724
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1913,7 +1909,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
19131909
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
19141910
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
19151911
; GFX1250-NEXT: s_wait_storecnt 0x0
1916-
; GFX1250-NEXT: s_wait_xcnt 0x0
19171912
; GFX1250-NEXT: s_wait_kmcnt 0x0
19181913
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
19191914
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1959,7 +1954,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
19591954
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
19601955
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
19611956
; GFX1250-NEXT: s_wait_storecnt 0x0
1962-
; GFX1250-NEXT: s_wait_xcnt 0x0
19631957
; GFX1250-NEXT: s_wait_kmcnt 0x0
19641958
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
19651959
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2002,7 +1996,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
20021996
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
20031997
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
20041998
; GFX1250-NEXT: s_wait_storecnt 0x0
2005-
; GFX1250-NEXT: s_wait_xcnt 0x0
20061999
; GFX1250-NEXT: s_wait_kmcnt 0x0
20072000
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
20082001
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2047,7 +2040,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
20472040
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
20482041
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
20492042
; GFX1250-NEXT: s_wait_storecnt 0x0
2050-
; GFX1250-NEXT: s_wait_xcnt 0x0
20512043
; GFX1250-NEXT: s_wait_kmcnt 0x0
20522044
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
20532045
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2210,7 +2202,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
22102202
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
22112203
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
22122204
; GFX1250-NEXT: s_wait_storecnt 0x0
2213-
; GFX1250-NEXT: s_wait_xcnt 0x0
22142205
; GFX1250-NEXT: s_wait_kmcnt 0x0
22152206
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
22162207
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2520,6 +2520,7 @@ define void @test_load_store_f32_to_bf16(ptr addrspace(1) %in, ptr addrspace(1)
25202520
; GFX1250-NEXT: s_wait_kmcnt 0x0
25212521
; GFX1250-NEXT: global_load_b32 v0, v[0:1], off
25222522
; GFX1250-NEXT: s_wait_loadcnt 0x0
2523+
; GFX1250-NEXT: s_wait_xcnt 0x0
25232524
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
25242525
; GFX1250-NEXT: global_store_b16 v[2:3], v0, off
25252526
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -2783,6 +2784,7 @@ define void @test_load_store_bf16_to_f32(ptr addrspace(1) %in, ptr addrspace(1)
27832784
; GFX1250-NEXT: s_wait_kmcnt 0x0
27842785
; GFX1250-NEXT: global_load_u16 v0, v[0:1], off
27852786
; GFX1250-NEXT: s_wait_loadcnt 0x0
2787+
; GFX1250-NEXT: s_wait_xcnt 0x0
27862788
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
27872789
; GFX1250-NEXT: global_store_b32 v[2:3], v0, off
27882790
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -2872,6 +2874,7 @@ define void @test_load_store_bf16_to_f64(ptr addrspace(1) %in, ptr addrspace(1)
28722874
; GFX1250-NEXT: s_wait_kmcnt 0x0
28732875
; GFX1250-NEXT: global_load_u16 v0, v[0:1], off
28742876
; GFX1250-NEXT: s_wait_loadcnt 0x0
2877+
; GFX1250-NEXT: s_wait_xcnt 0x0
28752878
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
28762879
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
28772880
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
@@ -6850,6 +6853,7 @@ define <2 x float> @global_extload_v2bf16_to_v2f32(ptr addrspace(1) %ptr) {
68506853
; GFX1250-NEXT: s_wait_kmcnt 0x0
68516854
; GFX1250-NEXT: global_load_b32 v1, v[0:1], off
68526855
; GFX1250-NEXT: s_wait_loadcnt 0x0
6856+
; GFX1250-NEXT: s_wait_xcnt 0x0
68536857
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v1
68546858
; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
68556859
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -6943,6 +6947,7 @@ define <3 x float> @global_extload_v3bf16_to_v3f32(ptr addrspace(1) %ptr) {
69436947
; GFX1250-NEXT: s_wait_kmcnt 0x0
69446948
; GFX1250-NEXT: global_load_b64 v[2:3], v[0:1], off
69456949
; GFX1250-NEXT: s_wait_loadcnt 0x0
6950+
; GFX1250-NEXT: s_wait_xcnt 0x0
69466951
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v2
69476952
; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
69486953
; GFX1250-NEXT: v_lshlrev_b32_e32 v2, 16, v3
@@ -7033,6 +7038,7 @@ define <4 x float> @global_extload_v4bf16_to_v4f32(ptr addrspace(1) %ptr) {
70337038
; GFX1250-NEXT: s_wait_kmcnt 0x0
70347039
; GFX1250-NEXT: global_load_b64 v[2:3], v[0:1], off
70357040
; GFX1250-NEXT: s_wait_loadcnt 0x0
7041+
; GFX1250-NEXT: s_wait_xcnt 0x0
70367042
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v2
70377043
; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
70387044
; GFX1250-NEXT: v_lshlrev_b32_e32 v2, 16, v3
@@ -7134,6 +7140,7 @@ define <5 x float> @global_extload_v5bf16_to_v5f32(ptr addrspace(1) %ptr) {
71347140
; GFX1250-NEXT: s_wait_kmcnt 0x0
71357141
; GFX1250-NEXT: global_load_b128 v[2:5], v[0:1], off
71367142
; GFX1250-NEXT: s_wait_loadcnt 0x0
7143+
; GFX1250-NEXT: s_wait_xcnt 0x0
71377144
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v2
71387145
; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
71397146
; GFX1250-NEXT: v_lshlrev_b32_e32 v2, 16, v3
@@ -7251,6 +7258,7 @@ define <6 x float> @global_extload_v6bf16_to_v6f32(ptr addrspace(1) %ptr) {
72517258
; GFX1250-NEXT: s_wait_kmcnt 0x0
72527259
; GFX1250-NEXT: global_load_b96 v[4:6], v[0:1], off
72537260
; GFX1250-NEXT: s_wait_loadcnt 0x0
7261+
; GFX1250-NEXT: s_wait_xcnt 0x0
72547262
; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v4 :: v_dual_lshlrev_b32 v2, 16, v5
72557263
; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
72567264
; GFX1250-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
@@ -7367,6 +7375,7 @@ define <8 x float> @global_extload_v8bf16_to_v8f32(ptr addrspace(1) %ptr) {
73677375
; GFX1250-NEXT: s_wait_kmcnt 0x0
73687376
; GFX1250-NEXT: global_load_b128 v[4:7], v[0:1], off
73697377
; GFX1250-NEXT: s_wait_loadcnt 0x0
7378+
; GFX1250-NEXT: s_wait_xcnt 0x0
73707379
; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v4 :: v_dual_lshlrev_b32 v2, 16, v5
73717380
; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
73727381
; GFX1250-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
@@ -8001,6 +8010,7 @@ define <2 x double> @global_extload_v2bf16_to_v2f64(ptr addrspace(1) %ptr) {
80018010
; GFX1250-NEXT: s_wait_kmcnt 0x0
80028011
; GFX1250-NEXT: global_load_b32 v0, v[0:1], off
80038012
; GFX1250-NEXT: s_wait_loadcnt 0x0
8013+
; GFX1250-NEXT: s_wait_xcnt 0x0
80048014
; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
80058015
; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
80068016
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -8241,6 +8251,7 @@ define <4 x double> @global_extload_v4bf16_to_v4f64(ptr addrspace(1) %ptr) {
82418251
; GFX1250-NEXT: s_wait_kmcnt 0x0
82428252
; GFX1250-NEXT: global_load_b64 v[2:3], v[0:1], off
82438253
; GFX1250-NEXT: s_wait_loadcnt 0x0
8254+
; GFX1250-NEXT: s_wait_xcnt 0x0
82448255
; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v2 :: v_dual_lshlrev_b32 v4, 16, v3
82458256
; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
82468257
; GFX1250-NEXT: v_and_b32_e32 v6, 0xffff0000, v3
@@ -8377,6 +8388,7 @@ define <5 x double> @global_extload_v5bf16_to_v5f64(ptr addrspace(1) %ptr) {
83778388
; GFX1250-NEXT: s_wait_kmcnt 0x0
83788389
; GFX1250-NEXT: global_load_b128 v[2:5], v[0:1], off
83798390
; GFX1250-NEXT: s_wait_loadcnt 0x0
8391+
; GFX1250-NEXT: s_wait_xcnt 0x0
83808392
; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v2 :: v_dual_lshlrev_b32 v5, 16, v3
83818393
; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
83828394
; GFX1250-NEXT: v_and_b32_e32 v6, 0xffff0000, v3
@@ -8522,6 +8534,7 @@ define <6 x double> @global_extload_v6bf16_to_v6f64(ptr addrspace(1) %ptr) {
85228534
; GFX1250-NEXT: s_wait_kmcnt 0x0
85238535
; GFX1250-NEXT: global_load_b96 v[4:6], v[0:1], off
85248536
; GFX1250-NEXT: s_wait_loadcnt 0x0
8537+
; GFX1250-NEXT: s_wait_xcnt 0x0
85258538
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v4
85268539
; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff0000, v4
85278540
; GFX1250-NEXT: v_lshlrev_b32_e32 v4, 16, v5
@@ -8693,6 +8706,7 @@ define <8 x double> @global_extload_v8bf16_to_v8f64(ptr addrspace(1) %ptr) {
86938706
; GFX1250-NEXT: s_wait_kmcnt 0x0
86948707
; GFX1250-NEXT: global_load_b128 v[8:11], v[0:1], off
86958708
; GFX1250-NEXT: s_wait_loadcnt 0x0
8709+
; GFX1250-NEXT: s_wait_xcnt 0x0
86968710
; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v8 :: v_dual_lshlrev_b32 v4, 16, v9
86978711
; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff0000, v8
86988712
; GFX1250-NEXT: v_and_b32_e32 v6, 0xffff0000, v9

llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 {
152152
; GCN-NEXT: s_wait_kmcnt 0x0
153153
; GCN-NEXT: global_load_b32 v2, v0, s[0:1] scale_offset scope:SCOPE_SYS
154154
; GCN-NEXT: s_wait_loadcnt 0x0
155-
; GCN-NEXT: s_wait_xcnt 0x0
156155
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
157156
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
158157
; GCN-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]

llvm/test/CodeGen/AMDGPU/flat-load-saddr-to-vaddr.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr(ptr addrspace(1) nocap
2727
; GCN-NEXT: s_wait_dscnt 0x0
2828
; GCN-NEXT: flat_load_b32 v3, v[0:1] scope:SCOPE_SYS
2929
; GCN-NEXT: s_wait_loadcnt 0x0
30-
; GCN-NEXT: s_wait_xcnt 0x0
3130
; GCN-NEXT: v_add_nc_u64_e32 v[0:1], 4, v[0:1]
3231
; GCN-NEXT: v_add_co_u32 v2, s0, v2, 1
3332
; GCN-NEXT: s_and_b32 vcc_lo, exec_lo, s0

llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
3838
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
3939
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
4040
; GFX1250-NEXT: s_wait_storecnt 0x0
41-
; GFX1250-NEXT: s_wait_xcnt 0x0
4241
; GFX1250-NEXT: s_wait_kmcnt 0x0
4342
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
4443
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -80,7 +79,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
8079
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
8180
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
8281
; GFX1250-NEXT: s_wait_storecnt 0x0
83-
; GFX1250-NEXT: s_wait_xcnt 0x0
8482
; GFX1250-NEXT: s_wait_kmcnt 0x0
8583
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
8684
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0

llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,7 +1473,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
14731473
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
14741474
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
14751475
; GFX1250-NEXT: s_wait_storecnt 0x0
1476-
; GFX1250-NEXT: s_wait_xcnt 0x0
14771476
; GFX1250-NEXT: s_wait_kmcnt 0x0
14781477
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
14791478
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1516,7 +1515,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
15161515
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
15171516
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
15181517
; GFX1250-NEXT: s_wait_storecnt 0x0
1519-
; GFX1250-NEXT: s_wait_xcnt 0x0
15201518
; GFX1250-NEXT: s_wait_kmcnt 0x0
15211519
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
15221520
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1561,7 +1559,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
15611559
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
15621560
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
15631561
; GFX1250-NEXT: s_wait_storecnt 0x0
1564-
; GFX1250-NEXT: s_wait_xcnt 0x0
15651562
; GFX1250-NEXT: s_wait_kmcnt 0x0
15661563
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
15671564
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1604,7 +1601,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
16041601
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
16051602
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
16061603
; GFX1250-NEXT: s_wait_storecnt 0x0
1607-
; GFX1250-NEXT: s_wait_xcnt 0x0
16081604
; GFX1250-NEXT: s_wait_kmcnt 0x0
16091605
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
16101606
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1776,7 +1772,6 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
17761772
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
17771773
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
17781774
; GFX1250-NEXT: s_wait_storecnt 0x0
1779-
; GFX1250-NEXT: s_wait_xcnt 0x0
17801775
; GFX1250-NEXT: s_wait_kmcnt 0x0
17811776
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
17821777
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1821,7 +1816,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
18211816
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
18221817
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
18231818
; GFX1250-NEXT: s_wait_storecnt 0x0
1824-
; GFX1250-NEXT: s_wait_xcnt 0x0
18251819
; GFX1250-NEXT: s_wait_kmcnt 0x0
18261820
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
18271821
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1864,7 +1858,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
18641858
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
18651859
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
18661860
; GFX1250-NEXT: s_wait_storecnt 0x0
1867-
; GFX1250-NEXT: s_wait_xcnt 0x0
18681861
; GFX1250-NEXT: s_wait_kmcnt 0x0
18691862
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
18701863
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1909,7 +1902,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
19091902
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
19101903
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
19111904
; GFX1250-NEXT: s_wait_storecnt 0x0
1912-
; GFX1250-NEXT: s_wait_xcnt 0x0
19131905
; GFX1250-NEXT: s_wait_kmcnt 0x0
19141906
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
19151907
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2083,7 +2075,6 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
20832075
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
20842076
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
20852077
; GFX1250-NEXT: s_wait_storecnt 0x0
2086-
; GFX1250-NEXT: s_wait_xcnt 0x0
20872078
; GFX1250-NEXT: s_wait_kmcnt 0x0
20882079
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
20892080
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
6666
; GFX1250-NEXT: s_wait_kmcnt 0x0
6767
; GFX1250-NEXT: global_load_b64 v[0:1], v0, s[0:1] scale_offset scope:SCOPE_SYS
6868
; GFX1250-NEXT: s_wait_loadcnt 0x0
69-
; GFX1250-NEXT: s_wait_xcnt 0x0
7069
; GFX1250-NEXT: v_xor_b32_e32 v0, src_flat_scratch_base_hi, v1
7170
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
7271
; GFX1250-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x4000000, v0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ define { i32, <3 x i32> } @global_load_tr6_b96_vaddr_no_align2_requirement(ptr a
330330
; GFX1250-NEXT: s_wait_kmcnt 0x0
331331
; GFX1250-NEXT: global_load_tr6_b96 v[2:4], v[0:1], off offset:32
332332
; GFX1250-NEXT: s_wait_loadcnt 0x0
333+
; GFX1250-NEXT: s_wait_xcnt 0x0
333334
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2
334335
; GFX1250-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
335336
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -348,6 +349,7 @@ define { i32, <3 x i32> } @global_load_tr6_b96_saddr_no_align2_requirement(ptr a
348349
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
349350
; GFX1250-NEXT: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:32
350351
; GFX1250-NEXT: s_wait_loadcnt 0x0
352+
; GFX1250-NEXT: s_wait_xcnt 0x0
351353
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2
352354
; GFX1250-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
353355
; GFX1250-NEXT: s_set_pc_i64 s[30:31]

llvm/test/CodeGen/AMDGPU/wait-xcnt.mir

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,50 +1070,6 @@ body: |
10701070
...
10711071

10721072
# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
1073-
---
1074-
name: mixed_pending_events
1075-
tracksRegLiveness: true
1076-
machineFunctionInfo:
1077-
isEntryFunction: true
1078-
body: |
1079-
; GCN-LABEL: name: mixed_pending_events
1080-
; GCN: bb.0:
1081-
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
1082-
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1083-
; GCN-NEXT: {{ $}}
1084-
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1085-
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
1086-
; GCN-NEXT: {{ $}}
1087-
; GCN-NEXT: bb.1:
1088-
; GCN-NEXT: successors: %bb.2(0x80000000)
1089-
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
1090-
; GCN-NEXT: {{ $}}
1091-
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1092-
; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1093-
; GCN-NEXT: {{ $}}
1094-
; GCN-NEXT: bb.2:
1095-
; GCN-NEXT: liveins: $sgpr2, $vgpr2
1096-
; GCN-NEXT: {{ $}}
1097-
; GCN-NEXT: S_WAIT_LOADCNT 1
1098-
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
1099-
; GCN-NEXT: S_WAIT_KMCNT 0
1100-
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
1101-
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
1102-
bb.0:
1103-
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1104-
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1105-
S_CBRANCH_SCC1 %bb.2, implicit $scc
1106-
bb.1:
1107-
liveins: $vgpr0_vgpr1, $sgpr2
1108-
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1109-
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1110-
bb.2:
1111-
liveins: $sgpr2, $vgpr2
1112-
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
1113-
$sgpr2 = S_MOV_B32 $sgpr2
1114-
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
1115-
...
1116-
11171073
---
11181074
name: mixed_pending_events
11191075
tracksRegLiveness: true

0 commit comments

Comments
 (0)