Skip to content

Commit 5299d5a

Browse files
committed
Merge amd-mi400 into amd-gfx13
2 parents 48f7d90 + 283d7dc commit 5299d5a

9 files changed

+2
-66
lines changed

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2633,7 +2633,8 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
26332633
if (IsVolatile) {
26342634
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
26352635

2636-
if (Op == SIMemOp::STORE)
2636+
if (Op == SIMemOp::STORE && !ST.hasGFX1250Insts() &&
2637+
TII->getNamedOperand(*MI, OpName::cpol))
26372638
Changed |= insertWaitsBeforeSystemScopeStore(MI);
26382639

26392640
// Ensure operation has completed at system scope to cause all volatile

llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7444,7 +7444,6 @@ define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %ar
74447444
; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v0, v1
74457445
; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v2
74467446
; GFX1250-SDAG-NEXT: v_mul_lo_u32 v3, v1, v0
7447-
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
74487447
; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v2, off scope:SCOPE_SYS
74497448
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
74507449
; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v1, off scope:SCOPE_SYS
@@ -7467,7 +7466,6 @@ define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %ar
74677466
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
74687467
; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v4
74697468
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v0
7470-
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
74717469
; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v4, off scope:SCOPE_SYS
74727470
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
74737471
; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v1, off scope:SCOPE_SYS
@@ -7723,7 +7721,6 @@ define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %a
77237721
; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
77247722
; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2
77257723
; GFX1250-SDAG-NEXT: v_mul_lo_u32 v3, v0, v1
7726-
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
77277724
; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v2, off scope:SCOPE_SYS
77287725
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
77297726
; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v3, off scope:SCOPE_SYS
@@ -7744,7 +7741,6 @@ define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %a
77447741
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
77457742
; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v4
77467743
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1
7747-
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
77487744
; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v4, off scope:SCOPE_SYS
77497745
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
77507746
; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SYS

llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ define <2 x bfloat> @v_mad_mixhi_bf16_bf16lo_bf16lo_bf16lo_undeflo_clamp_postcvt
141141
; GFX1250-NEXT: s_wait_kmcnt 0x0
142142
; GFX1250-NEXT: v_fma_mixlo_bf16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
143143
; GFX1250-NEXT: v_fma_mixhi_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
144-
; GFX1250-NEXT: s_wait_storecnt 0x0
145144
; GFX1250-NEXT: global_store_b16 v[0:1], v3, off scope:SCOPE_SYS
146145
; GFX1250-NEXT: s_wait_storecnt 0x0
147146
; GFX1250-NEXT: s_set_pc_i64 s[30:31]

llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,6 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
598598
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
599599
; GFX1250-NEXT: s_wait_kmcnt 0x0
600600
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
601-
; GFX1250-NEXT: s_wait_kmcnt 0x0
602-
; GFX1250-NEXT: s_wait_storecnt 0x0
603601
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
604602
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
605603
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -883,8 +881,6 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
883881
; GFX1250-NEXT: s_wait_xcnt 0x0
884882
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
885883
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
886-
; GFX1250-NEXT: s_wait_kmcnt 0x0
887-
; GFX1250-NEXT: s_wait_storecnt 0x0
888884
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
889885
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
890886
; GFX1250-NEXT: s_wait_storecnt 0x0

llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,6 @@ define amdgpu_kernel void @global_volatile_store_0(
538538
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
539539
; GFX1250-NEXT: s_wait_kmcnt 0x0
540540
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
541-
; GFX1250-NEXT: s_wait_loadcnt 0x0
542-
; GFX1250-NEXT: s_wait_kmcnt 0x0
543-
; GFX1250-NEXT: s_wait_storecnt 0x0
544541
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
545542
; GFX1250-NEXT: s_wait_storecnt 0x0
546543
; GFX1250-NEXT: s_endpgm
@@ -748,9 +745,6 @@ define amdgpu_kernel void @global_volatile_store_1(
748745
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
749746
; GFX1250-NEXT: s_wait_kmcnt 0x0
750747
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
751-
; GFX1250-NEXT: s_wait_loadcnt 0x0
752-
; GFX1250-NEXT: s_wait_kmcnt 0x0
753-
; GFX1250-NEXT: s_wait_storecnt 0x0
754748
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
755749
; GFX1250-NEXT: s_wait_storecnt 0x0
756750
; GFX1250-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -444,11 +444,6 @@ define amdgpu_kernel void @local_volatile_store_0(
444444
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
445445
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
446446
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
447-
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
448-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
449-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
450-
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
451-
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
452447
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
453448
; GFX12-WGP-NEXT: s_endpgm
454449
;
@@ -461,11 +456,6 @@ define amdgpu_kernel void @local_volatile_store_0(
461456
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
462457
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
463458
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
464-
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
465-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
466-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
467-
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
468-
; GFX12-CU-NEXT: s_wait_storecnt 0x0
469459
; GFX12-CU-NEXT: ds_store_b32 v0, v1
470460
; GFX12-CU-NEXT: s_endpgm
471461
;
@@ -478,9 +468,6 @@ define amdgpu_kernel void @local_volatile_store_0(
478468
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
479469
; GFX1250-NEXT: s_wait_kmcnt 0x0
480470
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
481-
; GFX1250-NEXT: s_wait_loadcnt 0x0
482-
; GFX1250-NEXT: s_wait_kmcnt 0x0
483-
; GFX1250-NEXT: s_wait_storecnt 0x0
484471
; GFX1250-NEXT: ds_store_b32 v0, v1
485472
; GFX1250-NEXT: s_endpgm
486473
ptr addrspace(1) %in, ptr addrspace(3) %out) {
@@ -606,11 +593,6 @@ define amdgpu_kernel void @local_volatile_store_1(
606593
; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2
607594
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
608595
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
609-
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
610-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
611-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
612-
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
613-
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
614596
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
615597
; GFX12-WGP-NEXT: s_endpgm
616598
;
@@ -627,11 +609,6 @@ define amdgpu_kernel void @local_volatile_store_1(
627609
; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
628610
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
629611
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
630-
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
631-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
632-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
633-
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
634-
; GFX12-CU-NEXT: s_wait_storecnt 0x0
635612
; GFX12-CU-NEXT: ds_store_b32 v0, v1
636613
; GFX12-CU-NEXT: s_endpgm
637614
;
@@ -648,9 +625,6 @@ define amdgpu_kernel void @local_volatile_store_1(
648625
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
649626
; GFX1250-NEXT: s_wait_kmcnt 0x0
650627
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
651-
; GFX1250-NEXT: s_wait_loadcnt 0x0
652-
; GFX1250-NEXT: s_wait_kmcnt 0x0
653-
; GFX1250-NEXT: s_wait_storecnt 0x0
654628
; GFX1250-NEXT: ds_store_b32 v0, v1
655629
; GFX1250-NEXT: s_endpgm
656630
ptr addrspace(1) %in, ptr addrspace(3) %out) {

llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -525,9 +525,6 @@ define amdgpu_kernel void @private_volatile_store_0(
525525
; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
526526
; GFX1250-NEXT: s_wait_kmcnt 0x0
527527
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
528-
; GFX1250-NEXT: s_wait_loadcnt 0x0
529-
; GFX1250-NEXT: s_wait_kmcnt 0x0
530-
; GFX1250-NEXT: s_wait_storecnt 0x0
531528
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
532529
; GFX1250-NEXT: s_wait_storecnt 0x0
533530
; GFX1250-NEXT: s_endpgm
@@ -717,9 +714,6 @@ define amdgpu_kernel void @private_volatile_store_1(
717714
; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2
718715
; GFX1250-NEXT: s_wait_kmcnt 0x0
719716
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
720-
; GFX1250-NEXT: s_wait_loadcnt 0x0
721-
; GFX1250-NEXT: s_wait_kmcnt 0x0
722-
; GFX1250-NEXT: s_wait_storecnt 0x0
723717
; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset scope:SCOPE_SYS
724718
; GFX1250-NEXT: s_wait_storecnt 0x0
725719
; GFX1250-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/rank-specialization-lowered.ll

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@ define void @dummy_store() #5 {
1515
; CHECK-NEXT: s_wait_kmcnt 0x0
1616
; CHECK-NEXT: s_mov_b32 s0, 0
1717
; CHECK-NEXT: v_mov_b32_e32 v0, 1
18-
; CHECK-NEXT: s_wait_loadcnt 0x0
19-
; CHECK-NEXT: s_wait_samplecnt 0x0
20-
; CHECK-NEXT: s_wait_rtscnt 0x0
21-
; CHECK-NEXT: s_wait_kmcnt 0x0
22-
; CHECK-NEXT: s_wait_storecnt 0x0
2318
; CHECK-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
2419
; CHECK-NEXT: s_wait_storecnt 0x0
2520
; CHECK-NEXT: s_set_pc_i64 s[30:31]
@@ -39,11 +34,6 @@ define void @dummy_rank1a() #5 {
3934
; CHECK-NEXT: s_wait_kmcnt 0x0
4035
; CHECK-NEXT: s_mov_b32 s0, 0
4136
; CHECK-NEXT: v_mov_b32_e32 v0, 1
42-
; CHECK-NEXT: s_wait_loadcnt 0x0
43-
; CHECK-NEXT: s_wait_samplecnt 0x0
44-
; CHECK-NEXT: s_wait_rtscnt 0x0
45-
; CHECK-NEXT: s_wait_kmcnt 0x0
46-
; CHECK-NEXT: s_wait_storecnt 0x0
4737
; CHECK-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
4838
; CHECK-NEXT: s_wait_storecnt 0x0
4939
; CHECK-NEXT: s_set_pc_i64 s[30:31]
@@ -63,11 +53,6 @@ define void @dummy_rank1b() #5 {
6353
; CHECK-NEXT: s_wait_kmcnt 0x0
6454
; CHECK-NEXT: s_mov_b32 s0, 0
6555
; CHECK-NEXT: v_mov_b32_e32 v0, 1
66-
; CHECK-NEXT: s_wait_loadcnt 0x0
67-
; CHECK-NEXT: s_wait_samplecnt 0x0
68-
; CHECK-NEXT: s_wait_rtscnt 0x0
69-
; CHECK-NEXT: s_wait_kmcnt 0x0
70-
; CHECK-NEXT: s_wait_storecnt 0x0
7156
; CHECK-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
7257
; CHECK-NEXT: s_wait_storecnt 0x0
7358
; CHECK-NEXT: s_set_pc_i64 s[30:31]

llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.mir

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ body: |
4949
; GFX1250-LABEL: name: generic_store_volatile
5050
; GFX1250: liveins: $vgpr0, $vgpr1, $vgpr2
5151
; GFX1250-NEXT: {{ $}}
52-
; GFX1250-NEXT: S_WAIT_LOADCNT_soft 0
53-
; GFX1250-NEXT: S_WAIT_KMCNT_soft 0
54-
; GFX1250-NEXT: S_WAIT_STORECNT_soft 0
5552
; GFX1250-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr2_vgpr3, killed renamable $vgpr0, 0, 24, implicit $exec :: (volatile store (s32), addrspace 1)
5653
; GFX1250-NEXT: S_WAIT_STORECNT_soft 0
5754
; GFX1250-NEXT: S_ENDPGM 0

0 commit comments

Comments
 (0)