11; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
2+ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
33; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
44; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
55
66define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic (ptr %ptr , float %data ) {
7- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
8- ; GFX940 : bb.1 (%ir-block.0):
9- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
10- ; GFX940 -NEXT: {{ $}}
11- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
14- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15- ; GFX940 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
16- ; GFX940 -NEXT: S_ENDPGM 0
7+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
8+ ; GFX942 : bb.1 (%ir-block.0):
9+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
10+ ; GFX942 -NEXT: {{ $}}
11+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
14+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15+ ; GFX942 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
16+ ; GFX942 -NEXT: S_ENDPGM 0
1717 ;
1818 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
1919 ; GFX11: bb.1 (%ir-block.0):
@@ -30,17 +30,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
3030}
3131
3232define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic (ptr %ptr , float %data ) {
33- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
34- ; GFX940 : bb.1 (%ir-block.0):
35- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
36- ; GFX940 -NEXT: {{ $}}
37- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
39- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
40- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41- ; GFX940 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
42- ; GFX940 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
43- ; GFX940 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
33+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
34+ ; GFX942 : bb.1 (%ir-block.0):
35+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
36+ ; GFX942 -NEXT: {{ $}}
37+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
39+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
40+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41+ ; GFX942 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
42+ ; GFX942 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
43+ ; GFX942 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4444 ;
4545 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
4646 ; GFX11: bb.1 (%ir-block.0):
@@ -58,16 +58,16 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
5858}
5959
6060define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw (ptr %ptr , float %data ) {
61- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
62- ; GFX940 : bb.1 (%ir-block.0):
63- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
64- ; GFX940 -NEXT: {{ $}}
65- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
66- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
67- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
68- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
69- ; GFX940 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
70- ; GFX940 -NEXT: S_ENDPGM 0
61+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
62+ ; GFX942 : bb.1 (%ir-block.0):
63+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
64+ ; GFX942 -NEXT: {{ $}}
65+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
66+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
67+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
68+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
69+ ; GFX942 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
70+ ; GFX942 -NEXT: S_ENDPGM 0
7171 ;
7272 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
7373 ; GFX11: bb.1 (%ir-block.0):
@@ -84,17 +84,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
8484}
8585
8686define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw (ptr %ptr , float %data ) {
87- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
88- ; GFX940 : bb.1 (%ir-block.0):
89- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
90- ; GFX940 -NEXT: {{ $}}
91- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
93- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
94- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
95- ; GFX940 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
96- ; GFX940 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
97- ; GFX940 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
87+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
88+ ; GFX942 : bb.1 (%ir-block.0):
89+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
90+ ; GFX942 -NEXT: {{ $}}
91+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
93+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
94+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
95+ ; GFX942 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
96+ ; GFX942 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
97+ ; GFX942 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
9898 ;
9999 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
100100 ; GFX11: bb.1 (%ir-block.0):
0 commit comments