@@ -642,27 +642,26 @@ define i32 @s_in_multiuse_A(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg
642
642
; GCN-NEXT: s_mov_b32 exec_lo, s16
643
643
; GCN-NEXT: v_writelane_b32 v40, s2, 4
644
644
; GCN-NEXT: s_add_i32 s32, s32, 16
645
+ ; GCN-NEXT: v_writelane_b32 v40, s34, 0
646
+ ; GCN-NEXT: v_writelane_b32 v40, s35, 1
647
+ ; GCN-NEXT: v_writelane_b32 v40, s30, 2
648
+ ; GCN-NEXT: v_writelane_b32 v40, s31, 3
645
649
; GCN-NEXT: s_getpc_b64 s[16:17]
646
650
; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4
647
651
; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12
648
652
; GCN-NEXT: s_xor_b32 s0, s0, s1
649
653
; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
650
- ; GCN-NEXT: v_writelane_b32 v40, s30, 0
651
- ; GCN-NEXT: v_writelane_b32 v40, s31, 1
652
- ; GCN-NEXT: v_writelane_b32 v40, s34, 2
653
- ; GCN-NEXT: s_mov_b32 s34, s1
654
- ; GCN-NEXT: v_writelane_b32 v40, s35, 3
655
654
; GCN-NEXT: s_and_b32 s35, s0, s3
656
- ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
655
+ ; GCN-NEXT: s_mov_b32 s34, s1
657
656
; GCN-NEXT: v_mov_b32_e32 v0, s35
658
657
; GCN-NEXT: s_waitcnt lgkmcnt(0)
659
658
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
660
659
; GCN-NEXT: s_xor_b32 s0, s35, s34
661
- ; GCN-NEXT: v_readlane_b32 s35 , v40, 3
660
+ ; GCN-NEXT: v_readlane_b32 s30 , v40, 2
662
661
; GCN-NEXT: v_mov_b32_e32 v0, s0
663
- ; GCN-NEXT: v_readlane_b32 s34 , v40, 2
664
- ; GCN-NEXT: v_readlane_b32 s31 , v40, 1
665
- ; GCN-NEXT: v_readlane_b32 s30 , v40, 0
662
+ ; GCN-NEXT: v_readlane_b32 s31 , v40, 3
663
+ ; GCN-NEXT: v_readlane_b32 s35 , v40, 1
664
+ ; GCN-NEXT: v_readlane_b32 s34 , v40, 0
666
665
; GCN-NEXT: s_mov_b32 s32, s33
667
666
; GCN-NEXT: v_readlane_b32 s0, v40, 4
668
667
; GCN-NEXT: s_or_saveexec_b32 s1, -1
@@ -687,29 +686,28 @@ define i32 @s_in_multiuse_B(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg
687
686
; GCN-NEXT: s_or_saveexec_b32 s16, -1
688
687
; GCN-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
689
688
; GCN-NEXT: s_mov_b32 exec_lo, s16
689
+ ; GCN-NEXT: v_writelane_b32 v40, s2, 4
690
690
; GCN-NEXT: s_add_i32 s32, s32, 16
691
+ ; GCN-NEXT: v_writelane_b32 v40, s34, 0
692
+ ; GCN-NEXT: v_writelane_b32 v40, s35, 1
693
+ ; GCN-NEXT: v_writelane_b32 v40, s30, 2
694
+ ; GCN-NEXT: v_writelane_b32 v40, s31, 3
691
695
; GCN-NEXT: s_getpc_b64 s[16:17]
692
696
; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4
693
697
; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12
694
- ; GCN-NEXT: v_writelane_b32 v40, s2, 4
695
- ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
696
698
; GCN-NEXT: s_xor_b32 s0, s0, s1
697
- ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
699
+ ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
698
700
; GCN-NEXT: v_mov_b32_e32 v0, s0
699
- ; GCN-NEXT: v_writelane_b32 v40, s30, 0
700
- ; GCN-NEXT: v_writelane_b32 v40, s31, 1
701
- ; GCN-NEXT: v_writelane_b32 v40, s34, 2
702
701
; GCN-NEXT: s_mov_b32 s34, s1
703
- ; GCN-NEXT: v_writelane_b32 v40, s35, 3
704
702
; GCN-NEXT: s_and_b32 s35, s0, s3
705
703
; GCN-NEXT: s_waitcnt lgkmcnt(0)
706
704
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
707
705
; GCN-NEXT: s_xor_b32 s0, s35, s34
708
- ; GCN-NEXT: v_readlane_b32 s35 , v40, 3
706
+ ; GCN-NEXT: v_readlane_b32 s30 , v40, 2
709
707
; GCN-NEXT: v_mov_b32_e32 v0, s0
710
- ; GCN-NEXT: v_readlane_b32 s34 , v40, 2
711
- ; GCN-NEXT: v_readlane_b32 s31 , v40, 1
712
- ; GCN-NEXT: v_readlane_b32 s30 , v40, 0
708
+ ; GCN-NEXT: v_readlane_b32 s31 , v40, 3
709
+ ; GCN-NEXT: v_readlane_b32 s35 , v40, 1
710
+ ; GCN-NEXT: v_readlane_b32 s34 , v40, 0
713
711
; GCN-NEXT: s_mov_b32 s32, s33
714
712
; GCN-NEXT: v_readlane_b32 s0, v40, 4
715
713
; GCN-NEXT: s_or_saveexec_b32 s1, -1
0 commit comments