@@ -965,11 +965,11 @@ define amdgpu_ps void @ps_mesa_inreg_v5i32(<5 x i32> inreg %arg0) {
965965; 
966966; GFX11-LABEL: ps_mesa_inreg_v5i32: 
967967; GFX11:       ; %bb.0: 
968- ; GFX11-NEXT:    s_add_i32 s3, s3, 4 
969- ; GFX11-NEXT:    s_add_i32 s2, s2, 3 
970968; GFX11-NEXT:    s_add_i32 s1, s1, 2 
971969; GFX11-NEXT:    s_add_i32 s4, s4, 5 
972970; GFX11-NEXT:    s_add_i32 s0, s0, 1 
971+ ; GFX11-NEXT:    s_add_i32 s3, s3, 4 
972+ ; GFX11-NEXT:    s_add_i32 s2, s2, 3 
973973; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v1, s1 
974974; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 
975975; GFX11-NEXT:    v_mov_b32_e32 v2, s2 
@@ -980,12 +980,11 @@ define amdgpu_ps void @ps_mesa_inreg_v5i32(<5 x i32> inreg %arg0) {
980980; 
981981; GFX1250-LABEL: ps_mesa_inreg_v5i32: 
982982; GFX1250:       ; %bb.0: 
983- ; GFX1250-NEXT:    s_add_co_i32 s3, s3, 4 
984- ; GFX1250-NEXT:    s_add_co_i32 s2, s2, 3 
985983; GFX1250-NEXT:    s_add_co_i32 s1, s1, 2 
986984; GFX1250-NEXT:    s_add_co_i32 s4, s4, 5 
987985; GFX1250-NEXT:    s_add_co_i32 s0, s0, 1 
988- ; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
986+ ; GFX1250-NEXT:    s_add_co_i32 s3, s3, 4 
987+ ; GFX1250-NEXT:    s_add_co_i32 s2, s2, 3 
989988; GFX1250-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v0, s0 
990989; GFX1250-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 
991990; GFX1250-NEXT:    v_mov_b32_e32 v3, s3 
@@ -1014,36 +1013,36 @@ define amdgpu_ps void @ps_mesa_inreg_v5f32(<5 x float> inreg %arg0) {
10141013; 
10151014; VI-LABEL: ps_mesa_inreg_v5f32: 
10161015; VI:       ; %bb.0: 
1017- ; VI-NEXT:    v_add_f32_e64 v3, s3, -1.0 
1018- ; VI-NEXT:    v_add_f32_e64 v2, s2, 4.0 
10191016; VI-NEXT:    v_add_f32_e64 v1, s1, 2.0 
10201017; VI-NEXT:    v_add_f32_e64 v0, s0, 1.0 
10211018; VI-NEXT:    v_add_f32_e64 v4, s4, 0.5 
1019+ ; VI-NEXT:    v_add_f32_e64 v3, s3, -1.0 
1020+ ; VI-NEXT:    v_add_f32_e64 v2, s2, 4.0 
10221021; VI-NEXT:    flat_store_dword v[0:1], v4 
10231022; VI-NEXT:    flat_store_dwordx4 v[0:1], v[0:3] 
10241023; VI-NEXT:    s_endpgm 
10251024; 
10261025; GFX11-LABEL: ps_mesa_inreg_v5f32: 
10271026; GFX11:       ; %bb.0: 
1028- ; GFX11-NEXT:    v_add_f32_e64 v3, s3, -1.0 
1029- ; GFX11-NEXT:    v_add_f32_e64 v2, s2, 4.0 
10301027; GFX11-NEXT:    v_add_f32_e64 v1, s1, 2.0 
10311028; GFX11-NEXT:    v_add_f32_e64 v4, s4, 0.5 
10321029; GFX11-NEXT:    v_add_f32_e64 v0, s0, 1.0 
1030+ ; GFX11-NEXT:    v_add_f32_e64 v3, s3, -1.0 
1031+ ; GFX11-NEXT:    v_add_f32_e64 v2, s2, 4.0 
10331032; GFX11-NEXT:    s_clause 0x1 
10341033; GFX11-NEXT:    global_store_b32 v[0:1], v4, off 
10351034; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off 
10361035; GFX11-NEXT:    s_endpgm 
10371036; 
10381037; GFX1250-LABEL: ps_mesa_inreg_v5f32: 
10391038; GFX1250:       ; %bb.0: 
1040- ; GFX1250-NEXT:    s_add_f32 s3, s3, -1.0 
10411039; GFX1250-NEXT:    s_add_f32 s4, s4, 0.5 
10421040; GFX1250-NEXT:    s_add_f32 s0, s0, 1.0 
10431041; GFX1250-NEXT:    s_add_f32 s1, s1, 2.0 
1042+ ; GFX1250-NEXT:    s_add_f32 s3, s3, -1.0 
10441043; GFX1250-NEXT:    s_add_f32 s2, s2, 4.0 
1045- ; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_2) 
10461044; GFX1250-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v0, s0 
1045+ ; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) 
10471046; GFX1250-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 
10481047; GFX1250-NEXT:    v_mov_b32_e32 v3, s3 
10491048; GFX1250-NEXT:    s_clause 0x1 
@@ -1148,32 +1147,32 @@ define amdgpu_ps void @ps_mesa_v5i32(<5 x i32> %arg0) {
11481147; 
11491148; VI-LABEL: ps_mesa_v5i32: 
11501149; VI:       ; %bb.0: 
1151- ; VI-NEXT:    v_add_u32_e32 v3, vcc, 4, v3 
1152- ; VI-NEXT:    v_add_u32_e32 v2, vcc, 3, v2 
11531150; VI-NEXT:    v_add_u32_e32 v1, vcc, 2, v1 
11541151; VI-NEXT:    v_add_u32_e32 v0, vcc, 1, v0 
11551152; VI-NEXT:    v_add_u32_e32 v4, vcc, 5, v4 
1153+ ; VI-NEXT:    v_add_u32_e32 v3, vcc, 4, v3 
1154+ ; VI-NEXT:    v_add_u32_e32 v2, vcc, 3, v2 
11561155; VI-NEXT:    flat_store_dword v[0:1], v4 
11571156; VI-NEXT:    flat_store_dwordx4 v[0:1], v[0:3] 
11581157; VI-NEXT:    s_endpgm 
11591158; 
11601159; GFX11-LABEL: ps_mesa_v5i32: 
11611160; GFX11:       ; %bb.0: 
1162- ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 4, v3 
1163- ; GFX11-NEXT:    v_add_nc_u32_e32 v2, 3, v2 
11641161; GFX11-NEXT:    v_add_nc_u32_e32 v1, 2, v1 
11651162; GFX11-NEXT:    v_add_nc_u32_e32 v4, 5, v4 
11661163; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v0 
1164+ ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 4, v3 
1165+ ; GFX11-NEXT:    v_add_nc_u32_e32 v2, 3, v2 
11671166; GFX11-NEXT:    s_clause 0x1 
11681167; GFX11-NEXT:    global_store_b32 v[0:1], v4, off 
11691168; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off 
11701169; GFX11-NEXT:    s_endpgm 
11711170; 
11721171; GFX1250-LABEL: ps_mesa_v5i32: 
11731172; GFX1250:       ; %bb.0: 
1174- ; GFX1250-NEXT:    v_dual_add_nc_u32 v3, 4, v3 :: v_dual_add_nc_u32 v2, 3, v2 
11751173; GFX1250-NEXT:    v_dual_add_nc_u32 v1, 2, v1 :: v_dual_add_nc_u32 v4, 5, v4 
1176- ; GFX1250-NEXT:    v_add_nc_u32_e32 v0, 1, v0 
1174+ ; GFX1250-NEXT:    v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v3, 4, v3 
1175+ ; GFX1250-NEXT:    v_add_nc_u32_e32 v2, 3, v2 
11771176; GFX1250-NEXT:    s_clause 0x1 
11781177; GFX1250-NEXT:    global_store_b32 v[0:1], v4, off 
11791178; GFX1250-NEXT:    global_store_b128 v[0:1], v[0:3], off 
@@ -1199,30 +1198,30 @@ define amdgpu_ps void @ps_mesa_v5f32(<5 x float> %arg0) {
11991198; 
12001199; VI-LABEL: ps_mesa_v5f32: 
12011200; VI:       ; %bb.0: 
1202- ; VI-NEXT:    v_add_f32_e32 v3, -1.0, v3 
1203- ; VI-NEXT:    v_add_f32_e32 v2, 4.0, v2 
12041201; VI-NEXT:    v_add_f32_e32 v1, 2.0, v1 
12051202; VI-NEXT:    v_add_f32_e32 v0, 1.0, v0 
12061203; VI-NEXT:    v_add_f32_e32 v4, 0.5, v4 
1204+ ; VI-NEXT:    v_add_f32_e32 v3, -1.0, v3 
1205+ ; VI-NEXT:    v_add_f32_e32 v2, 4.0, v2 
12071206; VI-NEXT:    flat_store_dword v[0:1], v4 
12081207; VI-NEXT:    flat_store_dwordx4 v[0:1], v[0:3] 
12091208; VI-NEXT:    s_endpgm 
12101209; 
12111210; GFX11-LABEL: ps_mesa_v5f32: 
12121211; GFX11:       ; %bb.0: 
1213- ; GFX11-NEXT:    v_dual_add_f32 v3, -1.0, v3 :: v_dual_add_f32 v2, 4.0, v2 
12141212; GFX11-NEXT:    v_dual_add_f32 v1, 2.0, v1 :: v_dual_add_f32 v4, 0.5, v4 
1215- ; GFX11-NEXT:    v_add_f32_e32 v0, 1.0, v0 
1213+ ; GFX11-NEXT:    v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v3, -1.0, v3 
1214+ ; GFX11-NEXT:    v_add_f32_e32 v2, 4.0, v2 
12161215; GFX11-NEXT:    s_clause 0x1 
12171216; GFX11-NEXT:    global_store_b32 v[0:1], v4, off 
12181217; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off 
12191218; GFX11-NEXT:    s_endpgm 
12201219; 
12211220; GFX1250-LABEL: ps_mesa_v5f32: 
12221221; GFX1250:       ; %bb.0: 
1223- ; GFX1250-NEXT:    v_dual_add_f32 v3, -1.0, v3 :: v_dual_add_f32 v2, 4.0, v2 
12241222; GFX1250-NEXT:    v_dual_add_f32 v1, 2.0, v1 :: v_dual_add_f32 v4, 0.5, v4 
1225- ; GFX1250-NEXT:    v_add_f32_e32 v0, 1.0, v0 
1223+ ; GFX1250-NEXT:    v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v3, -1.0, v3 
1224+ ; GFX1250-NEXT:    v_add_f32_e32 v2, 4.0, v2 
12261225; GFX1250-NEXT:    s_clause 0x1 
12271226; GFX1250-NEXT:    global_store_b32 v[0:1], v4, off 
12281227; GFX1250-NEXT:    global_store_b128 v[0:1], v[0:3], off 
0 commit comments