Skip to content

Commit c15a50a

Browse files
authored
[AMDGPU] More flatGVS gfx1250 patterns (#149410)
1 parent 6ff4718 commit c15a50a

File tree

5 files changed

+133
-298
lines changed

5 files changed

+133
-298
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,6 +1724,7 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
17241724

17251725
defm : FlatLoadPats <FLAT_LOAD_DWORD, atomic_load_nonext_32_flat, i32>;
17261726
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, i64>;
1727+
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, atomic_load_nonext_64_flat, v2i32>;
17271728

17281729
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
17291730
defm : FlatStorePats <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
@@ -1735,7 +1736,7 @@ defm : FlatStorePats <FLAT_STORE_DWORD, store_flat, vt>;
17351736

17361737
foreach vt = VReg_64.RegTypes in {
17371738
defm : FlatStorePats <FLAT_STORE_DWORDX2, store_flat, vt>;
1738-
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1739+
defm : FlatLoadPats <FLAT_LOAD_DWORDX2, load_flat, vt>;
17391740
}
17401741

17411742
defm : FlatStorePats <FLAT_STORE_DWORDX3, store_flat, v3i32>;
@@ -1747,6 +1748,7 @@ defm : FlatStorePats <FLAT_STORE_DWORDX4, store_flat, vt>;
17471748

17481749
defm : FlatStorePats <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
17491750
defm : FlatStorePats <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1751+
defm : FlatStorePats <FLAT_STORE_DWORDX2, atomic_store_64_flat, v2i32>;
17501752
defm : FlatStorePats <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
17511753
defm : FlatStorePats <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
17521754

@@ -1792,6 +1794,9 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
17921794

17931795
} // end foreach as
17941796

1797+
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1798+
defm : FlatStorePats <FLAT_STORE_SHORT, store_flat, i16>;
1799+
17951800
let SubtargetPredicate = isGFX12Plus in {
17961801
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
17971802

@@ -1806,19 +1811,19 @@ defm : FlatStorePats <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
18061811

18071812
let OtherPredicates = [D16PreservesUnusedBits] in {
18081813
// TODO: Handle atomic loads
1809-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1810-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1811-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1812-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1813-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1814-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1814+
defm : FlatLoadPats_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1815+
defm : FlatLoadPats_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1816+
defm : FlatLoadPats_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1817+
defm : FlatLoadPats_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1818+
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1819+
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
18151820

1816-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1817-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1818-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1819-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1820-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1821-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1821+
defm : FlatLoadPats_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1822+
defm : FlatLoadPats_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1823+
defm : FlatLoadPats_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1824+
defm : FlatLoadPats_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1825+
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1826+
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
18221827
}
18231828

18241829
} // End OtherPredicates = [HasFlatAddressSpace]
@@ -1890,6 +1895,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
18901895
// appropriate waits.
18911896
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_nonext_32_global, i32>;
18921897
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_nonext_64_global, i64>;
1898+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_nonext_64_global, v2i32>;
18931899

18941900
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
18951901
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
@@ -1929,6 +1935,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
19291935
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
19301936
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
19311937
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
1938+
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, v2i32>;
19321939

19331940
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
19341941
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -223,37 +223,37 @@ body: |
223223
; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst
224224
; GFX7: liveins: $vgpr0_vgpr1
225225
; GFX7-NEXT: {{ $}}
226-
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
227-
; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
228-
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
226+
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
227+
; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
228+
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
229229
;
230230
; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst
231231
; GFX9: liveins: $vgpr0_vgpr1
232232
; GFX9-NEXT: {{ $}}
233-
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
234-
; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
235-
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
233+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
234+
; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
235+
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
236236
;
237237
; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst
238238
; GFX10: liveins: $vgpr0_vgpr1
239239
; GFX10-NEXT: {{ $}}
240-
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
241-
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
242-
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
240+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
241+
; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
242+
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
243243
;
244244
; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst
245245
; GFX11: liveins: $vgpr0_vgpr1
246246
; GFX11-NEXT: {{ $}}
247-
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
248-
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
249-
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
247+
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
248+
; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
249+
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
250250
;
251251
; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst
252252
; GFX12: liveins: $vgpr0_vgpr1
253253
; GFX12-NEXT: {{ $}}
254-
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
255-
; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
256-
; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
254+
; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
255+
; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
256+
; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
257257
%0:vgpr(p0) = COPY $vgpr0_vgpr1
258258
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0)
259259
$vgpr0_vgpr1 = COPY %1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -252,30 +252,30 @@ body: |
252252
; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst
253253
; GFX7: liveins: $vgpr0_vgpr1
254254
; GFX7-NEXT: {{ $}}
255-
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
256-
; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
257-
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
255+
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
256+
; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1)
257+
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
258258
;
259259
; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst
260260
; GFX7-FLAT: liveins: $vgpr0_vgpr1
261261
; GFX7-FLAT-NEXT: {{ $}}
262-
; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
263-
; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
264-
; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
262+
; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
263+
; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1)
264+
; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
265265
;
266266
; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst
267267
; GFX9: liveins: $vgpr0_vgpr1
268268
; GFX9-NEXT: {{ $}}
269-
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
270-
; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
271-
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
269+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
270+
; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1)
271+
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
272272
;
273273
; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst
274274
; GFX10: liveins: $vgpr0_vgpr1
275275
; GFX10-NEXT: {{ $}}
276-
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
277-
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
278-
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
276+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
277+
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1)
278+
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
279279
%0:vgpr(p1) = COPY $vgpr0_vgpr1
280280
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1)
281281
$vgpr0_vgpr1 = COPY %1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ body: |
2222
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2323
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
2424
; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32))
25+
;
2526
; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst
2627
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
2728
; GFX9-NEXT: {{ $}}
@@ -51,6 +52,7 @@ body: |
5152
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
5253
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
5354
; GFX7-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>))
55+
;
5456
; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst
5557
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
5658
; GFX9-NEXT: {{ $}}
@@ -80,6 +82,7 @@ body: |
8082
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
8183
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
8284
; GFX7-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3))
85+
;
8386
; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst
8487
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
8588
; GFX9-NEXT: {{ $}}
@@ -109,6 +112,7 @@ body: |
109112
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
110113
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
111114
; GFX7-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5))
115+
;
112116
; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst
113117
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
114118
; GFX9-NEXT: {{ $}}
@@ -138,6 +142,7 @@ body: |
138142
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
139143
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
140144
; GFX7-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6))
145+
;
141146
; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst
142147
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
143148
; GFX9-NEXT: {{ $}}
@@ -167,6 +172,7 @@ body: |
167172
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
168173
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
169174
; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64))
175+
;
170176
; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst
171177
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
172178
; GFX9-NEXT: {{ $}}
@@ -193,15 +199,16 @@ body: |
193199
; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst
194200
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
195201
; GFX7-NEXT: {{ $}}
196-
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
197-
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
198-
; GFX7-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>))
202+
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
203+
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
204+
; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>))
205+
;
199206
; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst
200207
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
201208
; GFX9-NEXT: {{ $}}
202-
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
203-
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
204-
; GFX9-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>))
209+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
210+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
211+
; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>))
205212
%0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
206213
%1:vgpr(p0) = COPY $vgpr2_vgpr3
207214
G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 0)
@@ -225,6 +232,7 @@ body: |
225232
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
226233
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
227234
; GFX7-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>))
235+
;
228236
; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst
229237
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
230238
; GFX9-NEXT: {{ $}}
@@ -254,6 +262,7 @@ body: |
254262
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
255263
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
256264
; GFX7-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0))
265+
;
257266
; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst
258267
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
259268
; GFX9-NEXT: {{ $}}
@@ -282,6 +291,7 @@ body: |
282291
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
283292
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
284293
; GFX7-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1))
294+
;
285295
; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst
286296
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
287297
; GFX9-NEXT: {{ $}}

0 commit comments

Comments
 (0)