@@ -513,16 +513,16 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
513513; GFX908-LABEL: introduced_copy_to_sgpr:
514514; GFX908: ; %bb.0: ; %bb
515515; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc
516- ; GFX908-NEXT: s_load_dword s0, s[8:9], 0x18
517516; GFX908-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
518517; GFX908-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
519- ; GFX908-NEXT: s_mov_b32 s8, 0
520- ; GFX908-NEXT: s_mov_b32 s13, s8
521- ; GFX908-NEXT: v_mov_b32_e32 v19, 0
518+ ; GFX908-NEXT: s_load_dword s0, s[8:9], 0x18
519+ ; GFX908-NEXT: s_mov_b32 s12, 0
520+ ; GFX908-NEXT: s_mov_b32 s9, s12
522521; GFX908-NEXT: s_waitcnt lgkmcnt(0)
523522; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s7
524523; GFX908-NEXT: s_sub_i32 s1, 0, s7
525524; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s0
525+ ; GFX908-NEXT: v_mov_b32_e32 v19, 0
526526; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
527527; GFX908-NEXT: v_mov_b32_e32 v0, 0
528528; GFX908-NEXT: v_mov_b32_e32 v1, 0
@@ -542,14 +542,14 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
542542; GFX908-NEXT: s_cselect_b32 s2, s6, s2
543543; GFX908-NEXT: s_add_i32 s3, s1, 1
544544; GFX908-NEXT: s_cmp_ge_u32 s2, s7
545- ; GFX908-NEXT: s_cselect_b32 s12 , s3, s1
545+ ; GFX908-NEXT: s_cselect_b32 s8 , s3, s1
546546; GFX908-NEXT: s_lshr_b32 s2, s0, 16
547547; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s2
548548; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
549549; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
550550; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
551551; GFX908-NEXT: s_or_b32 s14, s14, 28
552- ; GFX908-NEXT: s_lshl_b64 s[16:17], s[12:13 ], 5
552+ ; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9 ], 5
553553; GFX908-NEXT: s_waitcnt vmcnt(0)
554554; GFX908-NEXT: v_readfirstlane_b32 s2, v16
555555; GFX908-NEXT: s_and_b32 s2, 0xffff, s2
@@ -573,15 +573,15 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
573573; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
574574; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
575575; GFX908-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
576- ; GFX908-NEXT: s_mov_b32 s9, s8
576+ ; GFX908-NEXT: s_mov_b32 s13, s12
577577; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[2:3]
578- ; GFX908-NEXT: v_mov_b32_e32 v4, s8
578+ ; GFX908-NEXT: v_mov_b32_e32 v4, s12
579579; GFX908-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v6
580- ; GFX908-NEXT: v_mov_b32_e32 v6, s8
581- ; GFX908-NEXT: v_mov_b32_e32 v8, s8
582- ; GFX908-NEXT: v_mov_b32_e32 v5, s9
583- ; GFX908-NEXT: v_mov_b32_e32 v7, s9
584- ; GFX908-NEXT: v_mov_b32_e32 v9, s9
580+ ; GFX908-NEXT: v_mov_b32_e32 v6, s12
581+ ; GFX908-NEXT: v_mov_b32_e32 v8, s12
582+ ; GFX908-NEXT: v_mov_b32_e32 v5, s13
583+ ; GFX908-NEXT: v_mov_b32_e32 v7, s13
584+ ; GFX908-NEXT: v_mov_b32_e32 v9, s13
585585; GFX908-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
586586; GFX908-NEXT: v_mov_b32_e32 v11, v5
587587; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
@@ -667,7 +667,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
667667; GFX908-NEXT: s_cbranch_vccz .LBB3_1
668668; GFX908-NEXT: ; %bb.11: ; %bb12
669669; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
670- ; GFX908-NEXT: s_add_u32 s10, s10, s12
670+ ; GFX908-NEXT: s_add_u32 s10, s10, s8
671671; GFX908-NEXT: s_addc_u32 s11, s11, 0
672672; GFX908-NEXT: s_add_u32 s14, s14, s16
673673; GFX908-NEXT: s_addc_u32 s15, s15, s17
@@ -679,15 +679,15 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
679679; GFX90A-LABEL: introduced_copy_to_sgpr:
680680; GFX90A: ; %bb.0: ; %bb
681681; GFX90A-NEXT: global_load_ushort v18, v[0:1], off glc
682- ; GFX90A-NEXT: s_load_dword s0, s[8:9], 0x18
683682; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
684683; GFX90A-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
685- ; GFX90A-NEXT: s_mov_b32 s8, 0
686- ; GFX90A-NEXT: s_mov_b32 s13, s8
687- ; GFX90A-NEXT: v_mov_b32_e32 v19, 0
684+ ; GFX90A-NEXT: s_load_dword s0, s[8:9], 0x18
685+ ; GFX90A-NEXT: s_mov_b32 s12, 0
686+ ; GFX90A-NEXT: s_mov_b32 s9, s12
688687; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
689688; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s7
690689; GFX90A-NEXT: s_sub_i32 s1, 0, s7
690+ ; GFX90A-NEXT: v_mov_b32_e32 v19, 0
691691; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
692692; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
693693; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
@@ -707,14 +707,14 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
707707; GFX90A-NEXT: s_cselect_b32 s2, s6, s2
708708; GFX90A-NEXT: s_add_i32 s3, s1, 1
709709; GFX90A-NEXT: s_cmp_ge_u32 s2, s7
710- ; GFX90A-NEXT: s_cselect_b32 s12 , s3, s1
710+ ; GFX90A-NEXT: s_cselect_b32 s8 , s3, s1
711711; GFX90A-NEXT: s_lshr_b32 s2, s0, 16
712712; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s2
713713; GFX90A-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
714714; GFX90A-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
715715; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
716716; GFX90A-NEXT: s_or_b32 s14, s14, 28
717- ; GFX90A-NEXT: s_lshl_b64 s[16:17], s[12:13 ], 5
717+ ; GFX90A-NEXT: s_lshl_b64 s[16:17], s[8:9 ], 5
718718; GFX90A-NEXT: s_waitcnt vmcnt(0)
719719; GFX90A-NEXT: v_readfirstlane_b32 s2, v18
720720; GFX90A-NEXT: s_and_b32 s2, 0xffff, s2
@@ -738,12 +738,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
738738; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
739739; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
740740; GFX90A-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
741- ; GFX90A-NEXT: s_mov_b32 s9, s8
741+ ; GFX90A-NEXT: s_mov_b32 s13, s12
742742; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[2:3]
743- ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[8:9 ], s[8:9 ] op_sel:[0,1]
743+ ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[12:13 ], s[12:13 ] op_sel:[0,1]
744744; GFX90A-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v8
745- ; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[8:9 ], s[8:9 ] op_sel:[0,1]
746- ; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[8:9 ], s[8:9 ] op_sel:[0,1]
745+ ; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[12:13 ], s[12:13 ] op_sel:[0,1]
746+ ; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[12:13 ], s[12:13 ] op_sel:[0,1]
747747; GFX90A-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
748748; GFX90A-NEXT: s_mov_b64 s[20:21], s[14:15]
749749; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
@@ -821,7 +821,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
821821; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
822822; GFX90A-NEXT: ; %bb.11: ; %bb12
823823; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
824- ; GFX90A-NEXT: s_add_u32 s10, s10, s12
824+ ; GFX90A-NEXT: s_add_u32 s10, s10, s8
825825; GFX90A-NEXT: s_addc_u32 s11, s11, 0
826826; GFX90A-NEXT: s_add_u32 s14, s14, s16
827827; GFX90A-NEXT: s_addc_u32 s15, s15, s17
0 commit comments