@@ -223,7 +223,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
223223; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
224224; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
225225; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
226- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
227226; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s2
228227; GFX1164-NEXT: s_mov_b32 s2, -1
229228; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -257,7 +256,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
257256; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
258257; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
259258; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
260- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
261259; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s2
262260; GFX1132-NEXT: s_mov_b32 s2, -1
263261; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -293,7 +291,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
293291; GFX1264-NEXT: s_wait_kmcnt 0x0
294292; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
295293; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
296- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
297294; GFX1264-NEXT: v_mad_u32_u24 v0, v0, 5, s2
298295; GFX1264-NEXT: s_mov_b32 s2, -1
299296; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -326,7 +323,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
326323; GFX1232-NEXT: s_wait_kmcnt 0x0
327324; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
328325; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
329- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
330326; GFX1232-NEXT: v_mad_u32_u24 v0, v0, 5, s2
331327; GFX1232-NEXT: s_mov_b32 s2, -1
332328; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -937,7 +933,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
937933; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
938934; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
939935; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
940- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
941936; GFX1164_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
942937; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1
943938; GFX1164_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -985,7 +980,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
985980; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
986981; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
987982; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
988- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
989983; GFX1132_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
990984; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1
991985; GFX1132_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -1034,7 +1028,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
10341028; GFX1264_ITERATIVE-NEXT: s_wait_kmcnt 0x0
10351029; GFX1264_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
10361030; GFX1264_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
1037- ; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
10381031; GFX1264_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
10391032; GFX1264_ITERATIVE-NEXT: s_mov_b32 s2, -1
10401033; GFX1264_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -1082,7 +1075,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
10821075; GFX1232_ITERATIVE-NEXT: s_wait_kmcnt 0x0
10831076; GFX1232_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
10841077; GFX1232_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
1085- ; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
10861078; GFX1232_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
10871079; GFX1232_ITERATIVE-NEXT: s_mov_b32 s2, -1
10881080; GFX1232_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -2159,12 +2151,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
21592151; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
21602152; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
21612153; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
2162- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_1)
2154+ ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1)
21632155; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
21642156; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
21652157; GFX1164-NEXT: s_mov_b32 s2, -1
21662158; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2167- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
21682159; GFX1164-NEXT: v_mov_b32_e32 v1, v3
21692160; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
21702161; GFX1164-NEXT: s_endpgm
@@ -2203,12 +2194,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22032194; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
22042195; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
22052196; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
2206- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_1)
2197+ ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1)
22072198; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
22082199; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
22092200; GFX1132-NEXT: s_mov_b32 s2, -1
22102201; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2211- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
22122202; GFX1132-NEXT: v_mov_b32_e32 v1, v3
22132203; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
22142204; GFX1132-NEXT: s_endpgm
@@ -2246,7 +2236,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22462236; GFX1264-NEXT: s_wait_kmcnt 0x0
22472237; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
22482238; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
2249- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2239+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
22502240; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
22512241; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
22522242; GFX1264-NEXT: s_mov_b32 s2, -1
@@ -2286,7 +2276,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22862276; GFX1232-NEXT: s_wait_kmcnt 0x0
22872277; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
22882278; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
2289- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2279+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
22902280; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
22912281; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
22922282; GFX1232-NEXT: s_mov_b32 s2, -1
@@ -4084,7 +4074,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
40844074; GFX1164-NEXT: v_mul_lo_u32 v0, s8, v0
40854075; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
40864076; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
4087- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
40884077; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
40894078; GFX1164-NEXT: s_mov_b32 s2, -1
40904079; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4121,7 +4110,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41214110; GFX1132-NEXT: v_mul_lo_u32 v0, s4, v0
41224111; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
41234112; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
4124- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
41254113; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41264114; GFX1132-NEXT: s_mov_b32 s2, -1
41274115; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4160,7 +4148,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41604148; GFX1264-NEXT: v_mul_lo_u32 v0, s8, v0
41614149; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
41624150; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
4163- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
41644151; GFX1264-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41654152; GFX1264-NEXT: s_mov_b32 s2, -1
41664153; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4198,7 +4185,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41984185; GFX1232-NEXT: v_mul_lo_u32 v0, s4, v0
41994186; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
42004187; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
4201- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
42024188; GFX1232-NEXT: v_sub_nc_u32_e32 v0, s2, v0
42034189; GFX1232-NEXT: s_mov_b32 s2, -1
42044190; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4482,7 +4468,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
44824468; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
44834469; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
44844470; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4485- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
44864471; GFX1164_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
44874472; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1
44884473; GFX1164_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4530,7 +4515,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
45304515; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
45314516; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
45324517; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4533- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
45344518; GFX1132_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
45354519; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1
45364520; GFX1132_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4579,7 +4563,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
45794563; GFX1264_ITERATIVE-NEXT: s_wait_kmcnt 0x0
45804564; GFX1264_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
45814565; GFX1264_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4582- ; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
45834566; GFX1264_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
45844567; GFX1264_ITERATIVE-NEXT: s_mov_b32 s2, -1
45854568; GFX1264_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4627,7 +4610,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
46274610; GFX1232_ITERATIVE-NEXT: s_wait_kmcnt 0x0
46284611; GFX1232_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
46294612; GFX1232_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4630- ; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
46314613; GFX1232_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
46324614; GFX1232_ITERATIVE-NEXT: s_mov_b32 s2, -1
46334615; GFX1232_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -5734,7 +5716,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
57345716; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
57355717; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
57365718; GFX1164-NEXT: s_mov_b32 s2, -1
5737- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_2)
57385719; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc
57395720; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
57405721; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5779,7 +5760,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
57795760; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
57805761; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
57815762; GFX1132-NEXT: s_mov_b32 s2, -1
5782- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
57835763; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc_lo
57845764; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
57855765; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5823,7 +5803,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58235803; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
58245804; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s2, v3
58255805; GFX1264-NEXT: s_mov_b32 s2, -1
5826- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_2)
58275806; GFX1264-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc
58285807; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
58295808; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -5866,7 +5845,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58665845; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
58675846; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
58685847; GFX1232-NEXT: s_mov_b32 s2, -1
5869- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_2)
58705848; GFX1232-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc_lo
58715849; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
58725850; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
0 commit comments