@@ -541,10 +541,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
541541; GFX908-NEXT: s_lshr_b32 s2, s0, 16
542542; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s2
543543; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
544- ; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
545544; GFX908-NEXT: v_mov_b32_e32 v0, 0
545+ ; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
546546; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
547- ; GFX908-NEXT: s_or_b32 s14, s14, 28
548547; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
549548; GFX908-NEXT: v_mov_b32_e32 v1, 0
550549; GFX908-NEXT: s_waitcnt vmcnt(0)
@@ -610,13 +609,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
610609; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
611610; GFX908-NEXT: s_add_u32 s22, s20, s9
612611; GFX908-NEXT: s_addc_u32 s23, s21, s13
613- ; GFX908-NEXT: global_load_dword v21, v17, s[22:23] offset:-12 glc
612+ ; GFX908-NEXT: global_load_dword v21, v17, s[22:23] offset:16 glc
614613; GFX908-NEXT: s_waitcnt vmcnt(0)
615- ; GFX908-NEXT: global_load_dword v20, v17, s[22:23] offset:-8 glc
614+ ; GFX908-NEXT: global_load_dword v20, v17, s[22:23] offset:20 glc
616615; GFX908-NEXT: s_waitcnt vmcnt(0)
617- ; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:-4 glc
616+ ; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:24 glc
618617; GFX908-NEXT: s_waitcnt vmcnt(0)
619- ; GFX908-NEXT: global_load_dword v12, v17, s[22:23] glc
618+ ; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:28 glc
620619; GFX908-NEXT: s_waitcnt vmcnt(0)
621620; GFX908-NEXT: ds_read_b64 v[12:13], v17
622621; GFX908-NEXT: ds_read_b64 v[14:15], v0
@@ -710,7 +709,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
710709; GFX90A-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
711710; GFX90A-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
712711; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
713- ; GFX90A-NEXT: s_or_b32 s14, s14, 28
714712; GFX90A-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
715713; GFX90A-NEXT: s_waitcnt vmcnt(0)
716714; GFX90A-NEXT: v_readfirstlane_b32 s2, v18
@@ -771,13 +769,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
771769; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
772770; GFX90A-NEXT: s_add_u32 s22, s20, s9
773771; GFX90A-NEXT: s_addc_u32 s23, s21, s13
774- ; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
772+ ; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:16 glc
775773; GFX90A-NEXT: s_waitcnt vmcnt(0)
776- ; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
774+ ; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:20 glc
777775; GFX90A-NEXT: s_waitcnt vmcnt(0)
778- ; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:-4 glc
776+ ; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:24 glc
779777; GFX90A-NEXT: s_waitcnt vmcnt(0)
780- ; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] glc
778+ ; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:28 glc
781779; GFX90A-NEXT: s_waitcnt vmcnt(0)
782780; GFX90A-NEXT: ds_read_b64 v[14:15], v19
783781; GFX90A-NEXT: ds_read_b64 v[16:17], v0
0 commit comments