@@ -2549,17 +2549,17 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
25492549;
25502550; GFX1164_ITERATIVE-LABEL: add_i64_varying:
25512551; GFX1164_ITERATIVE: ; %bb.0: ; %entry
2552- ; GFX1164_ITERATIVE-NEXT: v_and_b32_e32 v2, 0x3ff, v0
2553- ; GFX1164_ITERATIVE-NEXT: v_mov_b32_e32 v3, 0
2552+ ; GFX1164_ITERATIVE-NEXT: v_mov_b32_e32 v2, 0
2553+ ; GFX1164_ITERATIVE-NEXT: v_and_b32_e32 v3, 0x3ff, v0
25542554; GFX1164_ITERATIVE-NEXT: s_mov_b64 s[0:1], exec
25552555; GFX1164_ITERATIVE-NEXT: s_mov_b64 s[6:7], 0
25562556; GFX1164_ITERATIVE-NEXT: ; implicit-def: $vgpr0_vgpr1
25572557; GFX1164_ITERATIVE-NEXT: .LBB5_1: ; %ComputeLoop
25582558; GFX1164_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
25592559; GFX1164_ITERATIVE-NEXT: s_ctz_i32_b64 s2, s[0:1]
2560- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2561- ; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v2 , s2
2562- ; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s8, v3 , s2
2560+ ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1( SALU_CYCLE_1)
2561+ ; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v3 , s2
2562+ ; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s8, v2 , s2
25632563; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s2
25642564; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s2
25652565; GFX1164_ITERATIVE-NEXT: s_add_u32 s6, s6, s3
@@ -2606,16 +2606,16 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
26062606;
26072607; GFX1132_ITERATIVE-LABEL: add_i64_varying:
26082608; GFX1132_ITERATIVE: ; %bb.0: ; %entry
2609- ; GFX1132_ITERATIVE-NEXT: v_dual_mov_b32 v3 , 0 :: v_dual_and_b32 v2 , 0x3ff, v0
2609+ ; GFX1132_ITERATIVE-NEXT: v_dual_mov_b32 v2 , 0 :: v_dual_and_b32 v3 , 0x3ff, v0
26102610; GFX1132_ITERATIVE-NEXT: s_mov_b32 s0, exec_lo
26112611; GFX1132_ITERATIVE-NEXT: s_mov_b64 s[6:7], 0
26122612; GFX1132_ITERATIVE-NEXT: ; implicit-def: $vgpr0_vgpr1
26132613; GFX1132_ITERATIVE-NEXT: .LBB5_1: ; %ComputeLoop
26142614; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
26152615; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
26162616; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2617- ; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s2, v2 , s1
2618- ; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s3, v3 , s1
2617+ ; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s2, v3 , s1
2618+ ; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s3, v2 , s1
26192619; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
26202620; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
26212621; GFX1132_ITERATIVE-NEXT: s_add_u32 s6, s6, s2
@@ -2659,17 +2659,17 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
26592659;
26602660; GFX1264_ITERATIVE-LABEL: add_i64_varying:
26612661; GFX1264_ITERATIVE: ; %bb.0: ; %entry
2662- ; GFX1264_ITERATIVE-NEXT: v_and_b32_e32 v2, 0x3ff, v0
2663- ; GFX1264_ITERATIVE-NEXT: v_mov_b32_e32 v3, 0
2662+ ; GFX1264_ITERATIVE-NEXT: v_mov_b32_e32 v2, 0
2663+ ; GFX1264_ITERATIVE-NEXT: v_and_b32_e32 v3, 0x3ff, v0
26642664; GFX1264_ITERATIVE-NEXT: s_mov_b64 s[0:1], exec
26652665; GFX1264_ITERATIVE-NEXT: s_mov_b64 s[6:7], 0
26662666; GFX1264_ITERATIVE-NEXT: ; implicit-def: $vgpr0_vgpr1
26672667; GFX1264_ITERATIVE-NEXT: .LBB5_1: ; %ComputeLoop
26682668; GFX1264_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
26692669; GFX1264_ITERATIVE-NEXT: s_ctz_i32_b64 s8, s[0:1]
26702670; GFX1264_ITERATIVE-NEXT: s_wait_alu 0xfffe
2671- ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s3, v3 , s8
2672- ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s2, v2 , s8
2671+ ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s3, v2 , s8
2672+ ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s2, v3 , s8
26732673; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s8
26742674; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s8
26752675; GFX1264_ITERATIVE-NEXT: s_lshl_b64 s[8:9], 1, s8
@@ -2714,7 +2714,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
27142714;
27152715; GFX1232_ITERATIVE-LABEL: add_i64_varying:
27162716; GFX1232_ITERATIVE: ; %bb.0: ; %entry
2717- ; GFX1232_ITERATIVE-NEXT: v_dual_mov_b32 v3 , 0 :: v_dual_and_b32 v2 , 0x3ff, v0
2717+ ; GFX1232_ITERATIVE-NEXT: v_dual_mov_b32 v2 , 0 :: v_dual_and_b32 v3 , 0x3ff, v0
27182718; GFX1232_ITERATIVE-NEXT: s_mov_b32 s0, exec_lo
27192719; GFX1232_ITERATIVE-NEXT: s_mov_b64 s[6:7], 0
27202720; GFX1232_ITERATIVE-NEXT: ; implicit-def: $vgpr0_vgpr1
@@ -2723,8 +2723,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
27232723; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
27242724; GFX1232_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
27252725; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
2726- ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3 , s1
2727- ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2 , s1
2726+ ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v2 , s1
2727+ ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v3 , s1
27282728; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
27292729; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
27302730; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s1, 1, s1
@@ -6930,15 +6930,15 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
69306930;
69316931; GFX1164_ITERATIVE-LABEL: sub_i64_varying:
69326932; GFX1164_ITERATIVE: ; %bb.0: ; %entry
6933- ; GFX1164_ITERATIVE-NEXT: v_and_b32_e32 v0, 0x3ff, v0
69346933; GFX1164_ITERATIVE-NEXT: v_mov_b32_e32 v1, 0
6934+ ; GFX1164_ITERATIVE-NEXT: v_and_b32_e32 v0, 0x3ff, v0
69356935; GFX1164_ITERATIVE-NEXT: s_mov_b64 s[0:1], exec
69366936; GFX1164_ITERATIVE-NEXT: s_mov_b64 s[8:9], 0
69376937; GFX1164_ITERATIVE-NEXT: ; implicit-def: $vgpr4_vgpr5
69386938; GFX1164_ITERATIVE-NEXT: .LBB11_1: ; %ComputeLoop
69396939; GFX1164_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
69406940; GFX1164_ITERATIVE-NEXT: s_ctz_i32_b64 s2, s[0:1]
6941- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6941+ ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1( SALU_CYCLE_1)
69426942; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v0, s2
69436943; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s6, v1, s2
69446944; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v5, s9, s2
@@ -7087,17 +7087,17 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
70877087;
70887088; GFX1264_ITERATIVE-LABEL: sub_i64_varying:
70897089; GFX1264_ITERATIVE: ; %bb.0: ; %entry
7090- ; GFX1264_ITERATIVE-NEXT: v_and_b32_e32 v2, 0x3ff, v0
7091- ; GFX1264_ITERATIVE-NEXT: v_mov_b32_e32 v3, 0
7090+ ; GFX1264_ITERATIVE-NEXT: v_mov_b32_e32 v2, 0
7091+ ; GFX1264_ITERATIVE-NEXT: v_and_b32_e32 v3, 0x3ff, v0
70927092; GFX1264_ITERATIVE-NEXT: s_mov_b64 s[0:1], exec
70937093; GFX1264_ITERATIVE-NEXT: s_mov_b64 s[6:7], 0
70947094; GFX1264_ITERATIVE-NEXT: ; implicit-def: $vgpr0_vgpr1
70957095; GFX1264_ITERATIVE-NEXT: .LBB11_1: ; %ComputeLoop
70967096; GFX1264_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
70977097; GFX1264_ITERATIVE-NEXT: s_ctz_i32_b64 s8, s[0:1]
70987098; GFX1264_ITERATIVE-NEXT: s_wait_alu 0xfffe
7099- ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s3, v3 , s8
7100- ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s2, v2 , s8
7099+ ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s3, v2 , s8
7100+ ; GFX1264_ITERATIVE-NEXT: v_readlane_b32 s2, v3 , s8
71017101; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s8
71027102; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s8
71037103; GFX1264_ITERATIVE-NEXT: s_lshl_b64 s[8:9], 1, s8
@@ -7142,7 +7142,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
71427142;
71437143; GFX1232_ITERATIVE-LABEL: sub_i64_varying:
71447144; GFX1232_ITERATIVE: ; %bb.0: ; %entry
7145- ; GFX1232_ITERATIVE-NEXT: v_dual_mov_b32 v3 , 0 :: v_dual_and_b32 v2 , 0x3ff, v0
7145+ ; GFX1232_ITERATIVE-NEXT: v_dual_mov_b32 v2 , 0 :: v_dual_and_b32 v3 , 0x3ff, v0
71467146; GFX1232_ITERATIVE-NEXT: s_mov_b32 s0, exec_lo
71477147; GFX1232_ITERATIVE-NEXT: s_mov_b64 s[6:7], 0
71487148; GFX1232_ITERATIVE-NEXT: ; implicit-def: $vgpr0_vgpr1
@@ -7151,8 +7151,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
71517151; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
71527152; GFX1232_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
71537153; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
7154- ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3 , s1
7155- ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2 , s1
7154+ ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v2 , s1
7155+ ; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v3 , s1
71567156; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
71577157; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
71587158; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s1, 1, s1
0 commit comments