@@ -32,9 +32,11 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
3232; GFX906-NEXT: v_writelane_b32 v2, s24, 5
3333; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11]
3434; GFX906-NEXT: v_writelane_b32 v2, s26, 6
35+ ; GFX906-NEXT: v_writelane_b32 v41, s34, 2
3536; GFX906-NEXT: v_writelane_b32 v2, s27, 7
37+ ; GFX906-NEXT: v_writelane_b32 v41, s35, 3
3638; GFX906-NEXT: v_writelane_b32 v2, s8, 8
37- ; GFX906-NEXT: v_writelane_b32 v41, s16, 2
39+ ; GFX906-NEXT: v_writelane_b32 v41, s16, 4
3840; GFX906-NEXT: v_writelane_b32 v2, s9, 9
3941; GFX906-NEXT: v_writelane_b32 v41, s30, 0
4042; GFX906-NEXT: v_writelane_b32 v2, s4, 10
@@ -338,7 +340,9 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
338340; GFX906-NEXT: v_readlane_b32 s31, v41, 1
339341; GFX906-NEXT: v_readlane_b32 s30, v41, 0
340342; GFX906-NEXT: ; kill: killed $vgpr40
341- ; GFX906-NEXT: v_readlane_b32 s4, v41, 2
343+ ; GFX906-NEXT: v_readlane_b32 s34, v41, 2
344+ ; GFX906-NEXT: v_readlane_b32 s35, v41, 3
345+ ; GFX906-NEXT: v_readlane_b32 s4, v41, 4
342346; GFX906-NEXT: s_waitcnt vmcnt(0)
343347; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112
344348; GFX906-NEXT: s_waitcnt vmcnt(0)
@@ -379,23 +383,27 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
379383; GFX908-NEXT: s_mov_b64 exec, -1
380384; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
381385; GFX908-NEXT: s_mov_b64 exec, s[18:19]
382- ; GFX908-NEXT: v_mov_b32_e32 v3, s16
386+ ; GFX908-NEXT: v_mov_b32_e32 v3, s34
383387; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
388+ ; GFX908-NEXT: v_mov_b32_e32 v3, s35
389+ ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
390+ ; GFX908-NEXT: v_mov_b32_e32 v3, s16
391+ ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
384392; GFX908-NEXT: s_addk_i32 s32, 0x2c00
385393; GFX908-NEXT: s_mov_b64 s[16:17], exec
386394; GFX908-NEXT: s_mov_b64 exec, 1
387- ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164
395+ ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172
388396; GFX908-NEXT: v_writelane_b32 v2, s30, 0
389397; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
390- ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164
398+ ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172
391399; GFX908-NEXT: s_waitcnt vmcnt(0)
392400; GFX908-NEXT: s_mov_b64 exec, s[16:17]
393401; GFX908-NEXT: s_mov_b64 s[16:17], exec
394402; GFX908-NEXT: s_mov_b64 exec, 1
395- ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164
403+ ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172
396404; GFX908-NEXT: v_writelane_b32 v2, s31, 0
397405; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
398- ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164
406+ ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172
399407; GFX908-NEXT: s_waitcnt vmcnt(0)
400408; GFX908-NEXT: s_mov_b64 exec, s[16:17]
401409; GFX908-NEXT: ; implicit-def: $vgpr2
@@ -729,25 +737,31 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
729737; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
730738; GFX908-NEXT: s_waitcnt vmcnt(0)
731739; GFX908-NEXT: s_mov_b64 exec, 1
732- ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164
740+ ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172
733741; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
734742; GFX908-NEXT: s_waitcnt vmcnt(0)
735743; GFX908-NEXT: v_readlane_b32 s31, v0, 0
736- ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164
744+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172
737745; GFX908-NEXT: s_waitcnt vmcnt(0)
738746; GFX908-NEXT: s_mov_b64 exec, s[4:5]
739747; GFX908-NEXT: s_mov_b64 s[4:5], exec
740748; GFX908-NEXT: s_mov_b64 exec, 1
741- ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164
749+ ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172
742750; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
743751; GFX908-NEXT: s_waitcnt vmcnt(0)
744752; GFX908-NEXT: v_readlane_b32 s30, v0, 0
745- ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164
753+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172
746754; GFX908-NEXT: s_waitcnt vmcnt(0)
747755; GFX908-NEXT: s_mov_b64 exec, s[4:5]
748756; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
749757; GFX908-NEXT: ; kill: killed $vgpr40
750758; GFX908-NEXT: s_waitcnt vmcnt(0)
759+ ; GFX908-NEXT: v_readfirstlane_b32 s34, v0
760+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
761+ ; GFX908-NEXT: s_waitcnt vmcnt(0)
762+ ; GFX908-NEXT: v_readfirstlane_b32 s35, v0
763+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
764+ ; GFX908-NEXT: s_waitcnt vmcnt(0)
751765; GFX908-NEXT: v_readfirstlane_b32 s4, v0
752766; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1
753767; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
0 commit comments