@@ -301,12 +301,11 @@ define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
301301; GCN-NEXT: s_mov_b32 s10, s0
302302; GCN-NEXT: s_mov_b32 s12, s0
303303; GCN-NEXT: s_mov_b32 s14, s0
304- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
305- ; GCN-NEXT: s_lshl_b32 s18, s18, 1
306304; GCN-NEXT: v_mov_b32_e32 v0, s0
307305; GCN-NEXT: v_mov_b32_e32 v1, s1
308306; GCN-NEXT: v_mov_b32_e32 v15, s15
309- ; GCN-NEXT: s_mov_b32 m0, s18
307+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
308+ ; GCN-NEXT: s_lshl_b32 m0, s18, 1
310309; GCN-NEXT: v_mov_b32_e32 v2, s2
311310; GCN-NEXT: v_mov_b32_e32 v3, s3
312311; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -352,11 +351,10 @@ define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) {
352351; GCN-NEXT: s_mov_b32 s10, s0
353352; GCN-NEXT: s_mov_b32 s12, s0
354353; GCN-NEXT: s_waitcnt lgkmcnt(0)
355- ; GCN-NEXT: s_lshl_b32 s16, s16, 1
356354; GCN-NEXT: v_mov_b32_e32 v0, s0
357355; GCN-NEXT: v_mov_b32_e32 v1, s1
358356; GCN-NEXT: v_mov_b32_e32 v15, s15
359- ; GCN-NEXT: s_mov_b32 m0, s16
357+ ; GCN-NEXT: s_lshl_b32 m0, s16, 1
360358; GCN-NEXT: v_mov_b32_e32 v2, s2
361359; GCN-NEXT: v_mov_b32_e32 v3, s3
362360; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -451,12 +449,11 @@ define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) {
451449; GCN-NEXT: s_mov_b32 s60, s36
452450; GCN-NEXT: s_mov_b32 s62, s36
453451; GCN-NEXT: s_mov_b32 s64, s36
454- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
455- ; GCN-NEXT: s_lshl_b32 s2, s2, 1
456452; GCN-NEXT: v_mov_b32_e32 v0, s36
457453; GCN-NEXT: v_mov_b32_e32 v1, s37
458454; GCN-NEXT: v_mov_b32_e32 v31, s67
459- ; GCN-NEXT: s_mov_b32 m0, s2
455+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
456+ ; GCN-NEXT: s_lshl_b32 m0, s2, 1
460457; GCN-NEXT: v_mov_b32_e32 v2, s38
461458; GCN-NEXT: v_mov_b32_e32 v3, s39
462459; GCN-NEXT: v_mov_b32_e32 v4, s40
@@ -535,12 +532,11 @@ define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) {
535532; GCN-NEXT: s_mov_b32 s62, s36
536533; GCN-NEXT: s_mov_b32 s64, s36
537534; GCN-NEXT: s_mov_b32 s66, s36
538- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
539- ; GCN-NEXT: s_lshl_b32 s2, s2, 1
540535; GCN-NEXT: v_mov_b32_e32 v0, s36
541536; GCN-NEXT: v_mov_b32_e32 v1, s37
542537; GCN-NEXT: v_mov_b32_e32 v31, s67
543- ; GCN-NEXT: s_mov_b32 m0, s2
538+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
539+ ; GCN-NEXT: s_lshl_b32 m0, s2, 1
544540; GCN-NEXT: v_mov_b32_e32 v2, s38
545541; GCN-NEXT: v_mov_b32_e32 v3, s39
546542; GCN-NEXT: v_mov_b32_e32 v4, s40
0 commit comments