@@ -431,130 +431,6 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
431431 ret void
432432}
433433
434- <<<<<<< HEAD
435- ||||||| parent of e811d0543c02 (Use nounwind to avoid touching unrelated tests)
436- define void @test_call_external_void_func_bf16_inreg (bfloat inreg %arg ) #0 {
437- ; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
438- ; GFX9: ; %bb.0:
439- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440- ; GFX9-NEXT: s_mov_b32 s17, s33
441- ; GFX9-NEXT: s_mov_b32 s33, s32
442- ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
443- ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
444- ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
445- ; GFX9-NEXT: v_writelane_b32 v40, s17, 2
446- ; GFX9-NEXT: s_addk_i32 s32, 0x400
447- ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
448- ; GFX9-NEXT: s_getpc_b64 s[18:19]
449- ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
450- ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
451- ; GFX9-NEXT: s_mov_b32 s0, s16
452- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
453- ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
454- ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
455- ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
456- ; GFX9-NEXT: s_mov_b32 s32, s33
457- ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
458- ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
459- ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
460- ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
461- ; GFX9-NEXT: s_mov_b32 s33, s4
462- ; GFX9-NEXT: s_waitcnt vmcnt(0)
463- ; GFX9-NEXT: s_setpc_b64 s[30:31]
464- ;
465- ; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
466- ; GFX11: ; %bb.0:
467- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468- ; GFX11-NEXT: s_mov_b32 s1, s33
469- ; GFX11-NEXT: s_mov_b32 s33, s32
470- ; GFX11-NEXT: s_or_saveexec_b32 s2, -1
471- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
472- ; GFX11-NEXT: s_mov_b32 exec_lo, s2
473- ; GFX11-NEXT: v_writelane_b32 v40, s1, 2
474- ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
475- ; GFX11-NEXT: s_add_i32 s32, s32, 16
476- ; GFX11-NEXT: s_getpc_b64 s[2:3]
477- ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
478- ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
479- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
480- ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
481- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
482- ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
483- ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
484- ; GFX11-NEXT: s_mov_b32 s32, s33
485- ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
486- ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
487- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
488- ; GFX11-NEXT: s_mov_b32 exec_lo, s1
489- ; GFX11-NEXT: s_mov_b32 s33, s0
490- ; GFX11-NEXT: s_waitcnt vmcnt(0)
491- ; GFX11-NEXT: s_setpc_b64 s[30:31]
492- call void @external_void_func_bf16_inreg (bfloat inreg %arg )
493- ret void
494- }
495-
496- =======
497- define void @test_call_external_void_func_bf16_inreg (bfloat inreg %arg ) #0 {
498- ; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
499- ; GFX9: ; %bb.0:
500- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501- ; GFX9-NEXT: s_mov_b32 s17, s33
502- ; GFX9-NEXT: s_mov_b32 s33, s32
503- ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
504- ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
505- ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
506- ; GFX9-NEXT: v_writelane_b32 v40, s17, 2
507- ; GFX9-NEXT: s_addk_i32 s32, 0x400
508- ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
509- ; GFX9-NEXT: s_getpc_b64 s[18:19]
510- ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
511- ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
512- ; GFX9-NEXT: s_mov_b32 s0, s16
513- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
514- ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
515- ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
516- ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
517- ; GFX9-NEXT: s_mov_b32 s32, s33
518- ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
519- ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
520- ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
521- ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
522- ; GFX9-NEXT: s_mov_b32 s33, s4
523- ; GFX9-NEXT: s_waitcnt vmcnt(0)
524- ; GFX9-NEXT: s_setpc_b64 s[30:31]
525- ;
526- ; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
527- ; GFX11: ; %bb.0:
528- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529- ; GFX11-NEXT: s_mov_b32 s1, s33
530- ; GFX11-NEXT: s_mov_b32 s33, s32
531- ; GFX11-NEXT: s_or_saveexec_b32 s2, -1
532- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
533- ; GFX11-NEXT: s_mov_b32 exec_lo, s2
534- ; GFX11-NEXT: v_writelane_b32 v40, s1, 2
535- ; GFX11-NEXT: s_add_i32 s32, s32, 16
536- ; GFX11-NEXT: s_getpc_b64 s[2:3]
537- ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
538- ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
539- ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
540- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
541- ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
542- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
543- ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
544- ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
545- ; GFX11-NEXT: s_mov_b32 s32, s33
546- ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
547- ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
548- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
549- ; GFX11-NEXT: s_mov_b32 exec_lo, s1
550- ; GFX11-NEXT: s_mov_b32 s33, s0
551- ; GFX11-NEXT: s_waitcnt vmcnt(0)
552- ; GFX11-NEXT: s_setpc_b64 s[30:31]
553- call void @external_void_func_bf16_inreg (bfloat inreg %arg )
554- ret void
555- }
556-
557- >>>>>>> e811d0543c02 (Use nounwind to avoid touching unrelated tests)
558434define void @test_call_external_void_func_f32_inreg (float inreg %arg ) #0 {
559435; GFX9-LABEL: test_call_external_void_func_f32_inreg:
560436; GFX9: ; %bb.0:
@@ -707,132 +583,6 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
707583 ret void
708584}
709585
710- <<<<<<< HEAD
711- ||||||| parent of e811d0543c02 (Use nounwind to avoid touching unrelated tests)
712-
713- define void @test_call_external_void_func_v2bf16_inreg (<2 x bfloat> inreg %arg ) #0 {
714- ; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
715- ; GFX9: ; %bb.0:
716- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717- ; GFX9-NEXT: s_mov_b32 s17, s33
718- ; GFX9-NEXT: s_mov_b32 s33, s32
719- ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
720- ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
721- ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
722- ; GFX9-NEXT: v_writelane_b32 v40, s17, 2
723- ; GFX9-NEXT: s_addk_i32 s32, 0x400
724- ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
725- ; GFX9-NEXT: s_getpc_b64 s[18:19]
726- ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
727- ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
728- ; GFX9-NEXT: s_mov_b32 s0, s16
729- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
730- ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
731- ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
732- ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
733- ; GFX9-NEXT: s_mov_b32 s32, s33
734- ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
735- ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
736- ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
737- ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
738- ; GFX9-NEXT: s_mov_b32 s33, s4
739- ; GFX9-NEXT: s_waitcnt vmcnt(0)
740- ; GFX9-NEXT: s_setpc_b64 s[30:31]
741- ;
742- ; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
743- ; GFX11: ; %bb.0:
744- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745- ; GFX11-NEXT: s_mov_b32 s1, s33
746- ; GFX11-NEXT: s_mov_b32 s33, s32
747- ; GFX11-NEXT: s_or_saveexec_b32 s2, -1
748- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
749- ; GFX11-NEXT: s_mov_b32 exec_lo, s2
750- ; GFX11-NEXT: v_writelane_b32 v40, s1, 2
751- ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
752- ; GFX11-NEXT: s_add_i32 s32, s32, 16
753- ; GFX11-NEXT: s_getpc_b64 s[2:3]
754- ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
755- ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
756- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
757- ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
758- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
759- ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
760- ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
761- ; GFX11-NEXT: s_mov_b32 s32, s33
762- ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
763- ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
764- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
765- ; GFX11-NEXT: s_mov_b32 exec_lo, s1
766- ; GFX11-NEXT: s_mov_b32 s33, s0
767- ; GFX11-NEXT: s_waitcnt vmcnt(0)
768- ; GFX11-NEXT: s_setpc_b64 s[30:31]
769- call void @external_void_func_v2bf16_inreg (<2 x bfloat> inreg %arg )
770- ret void
771- }
772-
773- =======
774-
775- define void @test_call_external_void_func_v2bf16_inreg (<2 x bfloat> inreg %arg ) #0 {
776- ; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
777- ; GFX9: ; %bb.0:
778- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779- ; GFX9-NEXT: s_mov_b32 s17, s33
780- ; GFX9-NEXT: s_mov_b32 s33, s32
781- ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
782- ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
783- ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
784- ; GFX9-NEXT: v_writelane_b32 v40, s17, 2
785- ; GFX9-NEXT: s_addk_i32 s32, 0x400
786- ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
787- ; GFX9-NEXT: s_getpc_b64 s[18:19]
788- ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
789- ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
790- ; GFX9-NEXT: s_mov_b32 s0, s16
791- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
792- ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
793- ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
794- ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
795- ; GFX9-NEXT: s_mov_b32 s32, s33
796- ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
797- ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
798- ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
799- ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
800- ; GFX9-NEXT: s_mov_b32 s33, s4
801- ; GFX9-NEXT: s_waitcnt vmcnt(0)
802- ; GFX9-NEXT: s_setpc_b64 s[30:31]
803- ;
804- ; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
805- ; GFX11: ; %bb.0:
806- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
807- ; GFX11-NEXT: s_mov_b32 s1, s33
808- ; GFX11-NEXT: s_mov_b32 s33, s32
809- ; GFX11-NEXT: s_or_saveexec_b32 s2, -1
810- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
811- ; GFX11-NEXT: s_mov_b32 exec_lo, s2
812- ; GFX11-NEXT: v_writelane_b32 v40, s1, 2
813- ; GFX11-NEXT: s_add_i32 s32, s32, 16
814- ; GFX11-NEXT: s_getpc_b64 s[2:3]
815- ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
816- ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
817- ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
818- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
819- ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
820- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
821- ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
822- ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
823- ; GFX11-NEXT: s_mov_b32 s32, s33
824- ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
825- ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
826- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
827- ; GFX11-NEXT: s_mov_b32 exec_lo, s1
828- ; GFX11-NEXT: s_mov_b32 s33, s0
829- ; GFX11-NEXT: s_waitcnt vmcnt(0)
830- ; GFX11-NEXT: s_setpc_b64 s[30:31]
831- call void @external_void_func_v2bf16_inreg (<2 x bfloat> inreg %arg )
832- ret void
833- }
834-
835- >>>>>>> e811d0543c02 (Use nounwind to avoid touching unrelated tests)
836586define void @test_call_external_void_func_v3f16_inreg (<3 x half > inreg %arg ) #0 {
837587; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
838588; GFX11: ; %bb.0:
0 commit comments