@@ -3181,7 +3181,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
31813181; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
31823182; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00
31833183; GFX11-NEXT: s_or_saveexec_b32 s0, -1
3184- ; GFX11-NEXT: scratch_store_b32 off, v60 , s33 offset:1600 ; 4-byte Folded Spill
3184+ ; GFX11-NEXT: scratch_store_b32 off, v63 , s33 offset:1584 ; 4-byte Folded Spill
31853185; GFX11-NEXT: s_mov_b32 exec_lo, s0
31863186; GFX11-NEXT: s_mov_b32 s0, 0
31873187; GFX11-NEXT: v_mov_b32_e32 v4, 0
@@ -3191,19 +3191,22 @@ define amdgpu_gfx void @call_72xi32() #1 {
31913191; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
31923192; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
31933193; GFX11-NEXT: s_addk_i32 s32, 0xa00
3194- ; GFX11-NEXT: s_clause 0xb
3195- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:44
3196- ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:40
3197- ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:36
3198- ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:32
3199- ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:28
3200- ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:24
3201- ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:20
3202- ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:16
3203- ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:12
3204- ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:8
3205- ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:4
3206- ; GFX11-NEXT: scratch_store_b32 off, v59, s33
3194+ ; GFX11-NEXT: s_clause 0xe
3195+ ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:56
3196+ ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:52
3197+ ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:48
3198+ ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:44
3199+ ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:40
3200+ ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:36
3201+ ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:32
3202+ ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:28
3203+ ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:24
3204+ ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:20
3205+ ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:16
3206+ ; GFX11-NEXT: scratch_store_b32 off, v59, s33 offset:12
3207+ ; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:8
3208+ ; GFX11-NEXT: scratch_store_b32 off, v61, s33 offset:4
3209+ ; GFX11-NEXT: scratch_store_b32 off, v62, s33
32073210; GFX11-NEXT: s_add_i32 s0, s32, 0xa0
32083211; GFX11-NEXT: s_add_i32 s1, s32, 0x90
32093212; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
@@ -3224,7 +3227,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
32243227; GFX11-NEXT: s_add_i32 s0, s32, 32
32253228; GFX11-NEXT: s_add_i32 s1, s32, 16
32263229; GFX11-NEXT: s_add_i32 s2, s33, 0x200
3227- ; GFX11-NEXT: v_writelane_b32 v60 , s30, 0
3230+ ; GFX11-NEXT: v_writelane_b32 v63 , s30, 0
32283231; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
32293232; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
32303233; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
@@ -3245,7 +3248,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
32453248; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
32463249; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi
32473250; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo
3248- ; GFX11-NEXT: v_writelane_b32 v60 , s31, 1
3251+ ; GFX11-NEXT: v_writelane_b32 v63 , s31, 1
32493252; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
32503253; GFX11-NEXT: s_clause 0x1
32513254; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624
@@ -3267,7 +3270,8 @@ define amdgpu_gfx void @call_72xi32() #1 {
32673270; GFX11-NEXT: s_waitcnt vmcnt(2)
32683271; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4
32693272; GFX11-NEXT: s_waitcnt vmcnt(0)
3270- ; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill
3273+ ; GFX11-NEXT: v_dual_mov_b32 v62, v19 :: v_dual_mov_b32 v61, v18
3274+ ; GFX11-NEXT: v_mov_b32_e32 v60, v17
32713275; GFX11-NEXT: s_clause 0x3
32723276; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:528
32733277; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:544
@@ -3285,17 +3289,18 @@ define amdgpu_gfx void @call_72xi32() #1 {
32853289; GFX11-NEXT: s_waitcnt vmcnt(0)
32863290; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill
32873291; GFX11-NEXT: scratch_store_b128 off, v[32:35], s32
3288- ; GFX11-NEXT: v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v32, v36
3292+ ; GFX11-NEXT: v_mov_b32_e32 v32, v36
32893293; GFX11-NEXT: v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49
3294+ ; GFX11-NEXT: v_mov_b32_e32 v49, v52
32903295; GFX11-NEXT: v_dual_mov_b32 v35, v50 :: v_dual_mov_b32 v48, v51
3291- ; GFX11-NEXT: v_dual_mov_b32 v49, v52 :: v_dual_mov_b32 v50, v53
3292- ; GFX11-NEXT: v_dual_mov_b32 v51, v54 :: v_dual_mov_b32 v36, v55
3293- ; GFX11-NEXT: v_dual_mov_b32 v53, v41 :: v_dual_mov_b32 v52, v40
3294- ; GFX11-NEXT: v_dual_mov_b32 v54, v42 :: v_dual_mov_b32 v41, v56
3295- ; GFX11-NEXT: v_dual_mov_b32 v55, v43 :: v_dual_mov_b32 v40, v44
3296- ; GFX11-NEXT: v_dual_mov_b32 v42, v57 :: v_dual_mov_b32 v57, v12
3296+ ; GFX11-NEXT: v_dual_mov_b32 v50, v53 :: v_dual_mov_b32 v51, v54
3297+ ; GFX11-NEXT: v_mov_b32_e32 v36, v55
3298+ ; GFX11-NEXT: v_dual_mov_b32 v52, v40 :: v_dual_mov_b32 v53, v41
3299+ ; GFX11-NEXT: v_dual_mov_b32 v54, v42 :: v_dual_mov_b32 v55, v43
3300+ ; GFX11-NEXT: v_mov_b32_e32 v40, v44
3301+ ; GFX11-NEXT: v_dual_mov_b32 v41, v56 :: v_dual_mov_b32 v42, v57
32973302; GFX11-NEXT: v_dual_mov_b32 v43, v58 :: v_dual_mov_b32 v56, v59
3298- ; GFX11-NEXT: v_mov_b32_e32 v58, v13
3303+ ; GFX11-NEXT: v_dual_mov_b32 v57, v12 :: v_dual_mov_b32 v58, v13
32993304; GFX11-NEXT: v_dual_mov_b32 v12, v15 :: v_dual_mov_b32 v13, v0
33003305; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v0, v3
33013306; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6
@@ -3310,57 +3315,58 @@ define amdgpu_gfx void @call_72xi32() #1 {
33103315; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2
33113316; GFX11-NEXT: v_mov_b32_e32 v0, 24
33123317; GFX11-NEXT: s_add_i32 s2, s32, 0x70
3313- ; GFX11-NEXT: v_mov_b32_e32 v6, v17
3318+ ; GFX11-NEXT: v_mov_b32_e32 v2, v60
33143319; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2
3315- ; GFX11-NEXT: v_mov_b32_e32 v13, v24
3320+ ; GFX11-NEXT: v_mov_b32_e32 v15, v26
33163321; GFX11-NEXT: s_add_i32 s2, s32, 0x6c
3317- ; GFX11-NEXT: v_mov_b32_e32 v7, v18
3322+ ; GFX11-NEXT: v_dual_mov_b32 v4, v62 :: v_dual_mov_b32 v13, v24
33183323; GFX11-NEXT: scratch_store_b32 off, v0, s2
33193324; GFX11-NEXT: s_add_i32 s2, s32, 0x60
3320- ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26
3325+ ; GFX11-NEXT: v_dual_mov_b32 v6, v17 :: v_dual_mov_b32 v31, v47
33213326; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2
33223327; GFX11-NEXT: s_add_i32 s2, s32, 0x50
3323- ; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45
3328+ ; GFX11-NEXT: v_mov_b32_e32 v7, v18
33243329; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2
33253330; GFX11-NEXT: s_add_i32 s2, s32, 64
3326- ; GFX11-NEXT: v_mov_b32_e32 v14, v25
3331+ ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v29, v45
33273332; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2
33283333; GFX11-NEXT: s_add_i32 s2, s32, 48
3329- ; GFX11-NEXT: v_mov_b32_e32 v16, v27
3334+ ; GFX11-NEXT: v_mov_b32_e32 v12, v23
33303335; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2
33313336; GFX11-NEXT: s_add_i32 s2, s32, 32
3332- ; GFX11-NEXT: v_mov_b32_e32 v30, v46
3337+ ; GFX11-NEXT: v_mov_b32_e32 v14, v25
33333338; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2
33343339; GFX11-NEXT: s_add_i32 s2, s32, 16
3340+ ; GFX11-NEXT: v_mov_b32_e32 v16, v27
33353341; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2
3336- ; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1584 ; 16-byte Folded Reload
3337- ; GFX11-NEXT: s_waitcnt vmcnt(0)
3338- ; GFX11-NEXT: v_mov_b32_e32 v1, 42
33393342; GFX11-NEXT: s_clause 0x2
33403343; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1568
33413344; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1552
33423345; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1536
33433346; GFX11-NEXT: s_add_i32 s2, s33, 0x400
3344- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3345- ; GFX11-NEXT: v_mov_b32_e32 v0, s2
3347+ ; GFX11-NEXT: v_dual_mov_b32 v3, v61 :: v_dual_mov_b32 v30, v46
3348+ ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 42
33463349; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3347- ; GFX11-NEXT: s_clause 0xb
3348- ; GFX11-NEXT: scratch_load_b32 v59, off, s33
3349- ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4
3350- ; GFX11-NEXT: scratch_load_b32 v57, off, s33 offset:8
3351- ; GFX11-NEXT: scratch_load_b32 v56, off, s33 offset:12
3352- ; GFX11-NEXT: scratch_load_b32 v47, off, s33 offset:16
3353- ; GFX11-NEXT: scratch_load_b32 v46, off, s33 offset:20
3354- ; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:24
3355- ; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:28
3356- ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:32
3357- ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:36
3358- ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:40
3359- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:44
3360- ; GFX11-NEXT: v_readlane_b32 s31, v60, 1
3361- ; GFX11-NEXT: v_readlane_b32 s30, v60, 0
3350+ ; GFX11-NEXT: s_clause 0xe
3351+ ; GFX11-NEXT: scratch_load_b32 v62, off, s33
3352+ ; GFX11-NEXT: scratch_load_b32 v61, off, s33 offset:4
3353+ ; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:8
3354+ ; GFX11-NEXT: scratch_load_b32 v59, off, s33 offset:12
3355+ ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:16
3356+ ; GFX11-NEXT: scratch_load_b32 v57, off, s33 offset:20
3357+ ; GFX11-NEXT: scratch_load_b32 v56, off, s33 offset:24
3358+ ; GFX11-NEXT: scratch_load_b32 v47, off, s33 offset:28
3359+ ; GFX11-NEXT: scratch_load_b32 v46, off, s33 offset:32
3360+ ; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:36
3361+ ; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:40
3362+ ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:44
3363+ ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:48
3364+ ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:52
3365+ ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:56
3366+ ; GFX11-NEXT: v_readlane_b32 s31, v63, 1
3367+ ; GFX11-NEXT: v_readlane_b32 s30, v63, 0
33623368; GFX11-NEXT: s_or_saveexec_b32 s0, -1
3363- ; GFX11-NEXT: scratch_load_b32 v60 , off, s33 offset:1600 ; 4-byte Folded Reload
3369+ ; GFX11-NEXT: scratch_load_b32 v63 , off, s33 offset:1584 ; 4-byte Folded Reload
33643370; GFX11-NEXT: s_mov_b32 exec_lo, s0
33653371; GFX11-NEXT: s_addk_i32 s32, 0xf600
33663372; GFX11-NEXT: s_mov_b32 s33, s34
0 commit comments