@@ -301,12 +301,11 @@ define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
301
301
; GCN-NEXT: s_mov_b32 s10, s0
302
302
; GCN-NEXT: s_mov_b32 s12, s0
303
303
; GCN-NEXT: s_mov_b32 s14, s0
304
- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
305
- ; GCN-NEXT: s_lshl_b32 s18, s18, 1
306
304
; GCN-NEXT: v_mov_b32_e32 v0, s0
307
305
; GCN-NEXT: v_mov_b32_e32 v1, s1
308
306
; GCN-NEXT: v_mov_b32_e32 v15, s15
309
- ; GCN-NEXT: s_mov_b32 m0, s18
307
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
308
+ ; GCN-NEXT: s_lshl_b32 m0, s18, 1
310
309
; GCN-NEXT: v_mov_b32_e32 v2, s2
311
310
; GCN-NEXT: v_mov_b32_e32 v3, s3
312
311
; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -352,11 +351,10 @@ define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) {
352
351
; GCN-NEXT: s_mov_b32 s10, s0
353
352
; GCN-NEXT: s_mov_b32 s12, s0
354
353
; GCN-NEXT: s_waitcnt lgkmcnt(0)
355
- ; GCN-NEXT: s_lshl_b32 s16, s16, 1
356
354
; GCN-NEXT: v_mov_b32_e32 v0, s0
357
355
; GCN-NEXT: v_mov_b32_e32 v1, s1
358
356
; GCN-NEXT: v_mov_b32_e32 v15, s15
359
- ; GCN-NEXT: s_mov_b32 m0, s16
357
+ ; GCN-NEXT: s_lshl_b32 m0, s16, 1
360
358
; GCN-NEXT: v_mov_b32_e32 v2, s2
361
359
; GCN-NEXT: v_mov_b32_e32 v3, s3
362
360
; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -451,12 +449,11 @@ define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) {
451
449
; GCN-NEXT: s_mov_b32 s60, s36
452
450
; GCN-NEXT: s_mov_b32 s62, s36
453
451
; GCN-NEXT: s_mov_b32 s64, s36
454
- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
455
- ; GCN-NEXT: s_lshl_b32 s2, s2, 1
456
452
; GCN-NEXT: v_mov_b32_e32 v0, s36
457
453
; GCN-NEXT: v_mov_b32_e32 v1, s37
458
454
; GCN-NEXT: v_mov_b32_e32 v31, s67
459
- ; GCN-NEXT: s_mov_b32 m0, s2
455
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
456
+ ; GCN-NEXT: s_lshl_b32 m0, s2, 1
460
457
; GCN-NEXT: v_mov_b32_e32 v2, s38
461
458
; GCN-NEXT: v_mov_b32_e32 v3, s39
462
459
; GCN-NEXT: v_mov_b32_e32 v4, s40
@@ -535,12 +532,11 @@ define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) {
535
532
; GCN-NEXT: s_mov_b32 s62, s36
536
533
; GCN-NEXT: s_mov_b32 s64, s36
537
534
; GCN-NEXT: s_mov_b32 s66, s36
538
- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
539
- ; GCN-NEXT: s_lshl_b32 s2, s2, 1
540
535
; GCN-NEXT: v_mov_b32_e32 v0, s36
541
536
; GCN-NEXT: v_mov_b32_e32 v1, s37
542
537
; GCN-NEXT: v_mov_b32_e32 v31, s67
543
- ; GCN-NEXT: s_mov_b32 m0, s2
538
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
539
+ ; GCN-NEXT: s_lshl_b32 m0, s2, 1
544
540
; GCN-NEXT: v_mov_b32_e32 v2, s38
545
541
; GCN-NEXT: v_mov_b32_e32 v3, s39
546
542
; GCN-NEXT: v_mov_b32_e32 v4, s40
0 commit comments