11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2- ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3- ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
2+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
3+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,SDAG %s
4+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
5+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
46
57declare hidden void @external_void_func_i8_inreg (i8 inreg ) #0
68declare hidden void @external_void_func_i16_inreg (i32 inreg ) #0
@@ -12,11 +14,9 @@ declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
1214declare hidden void @external_void_func_v8i32_inreg (<8 x i32 > inreg ) #0
1315declare hidden void @external_void_func_v16i32_inreg (<16 x i32 > inreg ) #0
1416declare hidden void @external_void_func_f16_inreg (half inreg ) #0
15- declare hidden void @external_void_func_bf16_inreg (bfloat inreg ) #0
1617declare hidden void @external_void_func_f32_inreg (float inreg ) #0
1718declare hidden void @external_void_func_f64_inreg (double inreg ) #0
1819declare hidden void @external_void_func_v2f16_inreg (<2 x half > inreg ) #0
19- declare hidden void @external_void_func_v2bf16_inreg (<2 x bfloat> inreg ) #0
2020declare hidden void @external_void_func_v3f16_inreg (<3 x half > inreg ) #0
2121declare hidden void @external_void_func_v4f16_inreg (<4 x half > inreg ) #0
2222
@@ -585,66 +585,6 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
585585 ret void
586586}
587587
588- define void @test_call_external_void_func_bf16_inreg (bfloat inreg %arg ) #0 {
589- ; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
590- ; GFX9: ; %bb.0:
591- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592- ; GFX9-NEXT: s_mov_b32 s17, s33
593- ; GFX9-NEXT: s_mov_b32 s33, s32
594- ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
595- ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
596- ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
597- ; GFX9-NEXT: v_writelane_b32 v40, s17, 2
598- ; GFX9-NEXT: s_addk_i32 s32, 0x400
599- ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
600- ; GFX9-NEXT: s_getpc_b64 s[18:19]
601- ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
602- ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
603- ; GFX9-NEXT: s_mov_b32 s0, s16
604- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
605- ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
606- ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
607- ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
608- ; GFX9-NEXT: s_mov_b32 s32, s33
609- ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
610- ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
611- ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
612- ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
613- ; GFX9-NEXT: s_mov_b32 s33, s4
614- ; GFX9-NEXT: s_waitcnt vmcnt(0)
615- ; GFX9-NEXT: s_setpc_b64 s[30:31]
616- ;
617- ; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
618- ; GFX11: ; %bb.0:
619- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620- ; GFX11-NEXT: s_mov_b32 s1, s33
621- ; GFX11-NEXT: s_mov_b32 s33, s32
622- ; GFX11-NEXT: s_or_saveexec_b32 s2, -1
623- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
624- ; GFX11-NEXT: s_mov_b32 exec_lo, s2
625- ; GFX11-NEXT: v_writelane_b32 v40, s1, 2
626- ; GFX11-NEXT: s_add_i32 s32, s32, 16
627- ; GFX11-NEXT: s_getpc_b64 s[2:3]
628- ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
629- ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
630- ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
631- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
632- ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
633- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
634- ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
635- ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
636- ; GFX11-NEXT: s_mov_b32 s32, s33
637- ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
638- ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
639- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
640- ; GFX11-NEXT: s_mov_b32 exec_lo, s1
641- ; GFX11-NEXT: s_mov_b32 s33, s0
642- ; GFX11-NEXT: s_waitcnt vmcnt(0)
643- ; GFX11-NEXT: s_setpc_b64 s[30:31]
644- call void @external_void_func_bf16_inreg (bfloat inreg %arg )
645- ret void
646- }
647-
648588define void @test_call_external_void_func_f32_inreg (float inreg %arg ) #0 {
649589; GFX9-LABEL: test_call_external_void_func_f32_inreg:
650590; GFX9: ; %bb.0:
@@ -826,67 +766,6 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
826766 ret void
827767}
828768
829-
830- define void @test_call_external_void_func_v2bf16_inreg (<2 x bfloat> inreg %arg ) #0 {
831- ; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
832- ; GFX9: ; %bb.0:
833- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834- ; GFX9-NEXT: s_mov_b32 s17, s33
835- ; GFX9-NEXT: s_mov_b32 s33, s32
836- ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
837- ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
838- ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
839- ; GFX9-NEXT: v_writelane_b32 v40, s17, 2
840- ; GFX9-NEXT: s_addk_i32 s32, 0x400
841- ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
842- ; GFX9-NEXT: s_getpc_b64 s[18:19]
843- ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
844- ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
845- ; GFX9-NEXT: s_mov_b32 s0, s16
846- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
847- ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
848- ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
849- ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
850- ; GFX9-NEXT: s_mov_b32 s32, s33
851- ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
852- ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
853- ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
854- ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
855- ; GFX9-NEXT: s_mov_b32 s33, s4
856- ; GFX9-NEXT: s_waitcnt vmcnt(0)
857- ; GFX9-NEXT: s_setpc_b64 s[30:31]
858- ;
859- ; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
860- ; GFX11: ; %bb.0:
861- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
862- ; GFX11-NEXT: s_mov_b32 s1, s33
863- ; GFX11-NEXT: s_mov_b32 s33, s32
864- ; GFX11-NEXT: s_or_saveexec_b32 s2, -1
865- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
866- ; GFX11-NEXT: s_mov_b32 exec_lo, s2
867- ; GFX11-NEXT: v_writelane_b32 v40, s1, 2
868- ; GFX11-NEXT: s_add_i32 s32, s32, 16
869- ; GFX11-NEXT: s_getpc_b64 s[2:3]
870- ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
871- ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
872- ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
873- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
874- ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
875- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
876- ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
877- ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
878- ; GFX11-NEXT: s_mov_b32 s32, s33
879- ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
880- ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
881- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
882- ; GFX11-NEXT: s_mov_b32 exec_lo, s1
883- ; GFX11-NEXT: s_mov_b32 s33, s0
884- ; GFX11-NEXT: s_waitcnt vmcnt(0)
885- ; GFX11-NEXT: s_setpc_b64 s[30:31]
886- call void @external_void_func_v2bf16_inreg (<2 x bfloat> inreg %arg )
887- ret void
888- }
889-
890769define void @test_call_external_void_func_v3f16_inreg (<3 x half > inreg %arg ) #0 {
891770; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
892771; GFX9: ; %bb.0:
@@ -1529,3 +1408,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
15291408attributes #0 = { nounwind }
15301409attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
15311410
1411+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1412+ ; GISEL: {{.*}}
1413+ ; SDAG: {{.*}}
0 commit comments