@@ -642,27 +642,26 @@ define i32 @s_in_multiuse_A(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg
642642; GCN-NEXT: s_mov_b32 exec_lo, s16
643643; GCN-NEXT: v_writelane_b32 v40, s2, 4
644644; GCN-NEXT: s_add_i32 s32, s32, 16
645+ ; GCN-NEXT: v_writelane_b32 v40, s34, 0
646+ ; GCN-NEXT: v_writelane_b32 v40, s35, 1
647+ ; GCN-NEXT: v_writelane_b32 v40, s30, 2
648+ ; GCN-NEXT: v_writelane_b32 v40, s31, 3
645649; GCN-NEXT: s_getpc_b64 s[16:17]
646650; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4
647651; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12
648652; GCN-NEXT: s_xor_b32 s0, s0, s1
649653; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
650- ; GCN-NEXT: v_writelane_b32 v40, s30, 0
651- ; GCN-NEXT: v_writelane_b32 v40, s31, 1
652- ; GCN-NEXT: v_writelane_b32 v40, s34, 2
653- ; GCN-NEXT: s_mov_b32 s34, s1
654- ; GCN-NEXT: v_writelane_b32 v40, s35, 3
655654; GCN-NEXT: s_and_b32 s35, s0, s3
656- ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
655+ ; GCN-NEXT: s_mov_b32 s34, s1
657656; GCN-NEXT: v_mov_b32_e32 v0, s35
658657; GCN-NEXT: s_waitcnt lgkmcnt(0)
659658; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
660659; GCN-NEXT: s_xor_b32 s0, s35, s34
661- ; GCN-NEXT: v_readlane_b32 s35 , v40, 3
660+ ; GCN-NEXT: v_readlane_b32 s30 , v40, 2
662661; GCN-NEXT: v_mov_b32_e32 v0, s0
663- ; GCN-NEXT: v_readlane_b32 s34 , v40, 2
664- ; GCN-NEXT: v_readlane_b32 s31 , v40, 1
665- ; GCN-NEXT: v_readlane_b32 s30 , v40, 0
662+ ; GCN-NEXT: v_readlane_b32 s31 , v40, 3
663+ ; GCN-NEXT: v_readlane_b32 s35 , v40, 1
664+ ; GCN-NEXT: v_readlane_b32 s34 , v40, 0
666665; GCN-NEXT: s_mov_b32 s32, s33
667666; GCN-NEXT: v_readlane_b32 s0, v40, 4
668667; GCN-NEXT: s_or_saveexec_b32 s1, -1
@@ -687,29 +686,28 @@ define i32 @s_in_multiuse_B(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg
687686; GCN-NEXT: s_or_saveexec_b32 s16, -1
688687; GCN-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
689688; GCN-NEXT: s_mov_b32 exec_lo, s16
689+ ; GCN-NEXT: v_writelane_b32 v40, s2, 4
690690; GCN-NEXT: s_add_i32 s32, s32, 16
691+ ; GCN-NEXT: v_writelane_b32 v40, s34, 0
692+ ; GCN-NEXT: v_writelane_b32 v40, s35, 1
693+ ; GCN-NEXT: v_writelane_b32 v40, s30, 2
694+ ; GCN-NEXT: v_writelane_b32 v40, s31, 3
691695; GCN-NEXT: s_getpc_b64 s[16:17]
692696; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4
693697; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12
694- ; GCN-NEXT: v_writelane_b32 v40, s2, 4
695- ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
696698; GCN-NEXT: s_xor_b32 s0, s0, s1
697- ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
699+ ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
698700; GCN-NEXT: v_mov_b32_e32 v0, s0
699- ; GCN-NEXT: v_writelane_b32 v40, s30, 0
700- ; GCN-NEXT: v_writelane_b32 v40, s31, 1
701- ; GCN-NEXT: v_writelane_b32 v40, s34, 2
702701; GCN-NEXT: s_mov_b32 s34, s1
703- ; GCN-NEXT: v_writelane_b32 v40, s35, 3
704702; GCN-NEXT: s_and_b32 s35, s0, s3
705703; GCN-NEXT: s_waitcnt lgkmcnt(0)
706704; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
707705; GCN-NEXT: s_xor_b32 s0, s35, s34
708- ; GCN-NEXT: v_readlane_b32 s35 , v40, 3
706+ ; GCN-NEXT: v_readlane_b32 s30 , v40, 2
709707; GCN-NEXT: v_mov_b32_e32 v0, s0
710- ; GCN-NEXT: v_readlane_b32 s34 , v40, 2
711- ; GCN-NEXT: v_readlane_b32 s31 , v40, 1
712- ; GCN-NEXT: v_readlane_b32 s30 , v40, 0
708+ ; GCN-NEXT: v_readlane_b32 s31 , v40, 3
709+ ; GCN-NEXT: v_readlane_b32 s35 , v40, 1
710+ ; GCN-NEXT: v_readlane_b32 s34 , v40, 0
713711; GCN-NEXT: s_mov_b32 s32, s33
714712; GCN-NEXT: v_readlane_b32 s0, v40, 4
715713; GCN-NEXT: s_or_saveexec_b32 s1, -1
0 commit comments