@@ -289,16 +289,16 @@ entry:
289
289
define amdgpu_kernel void @half4_inselt (ptr addrspace (1 ) %out , <4 x half > %vec , i32 %sel ) {
290
290
; GCN-LABEL: half4_inselt:
291
291
; GCN: ; %bb.0: ; %entry
292
- ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
293
292
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
293
+ ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
294
294
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
295
295
; GCN-NEXT: s_mov_b32 s5, s4
296
296
; GCN-NEXT: s_waitcnt lgkmcnt(0)
297
+ ; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
297
298
; GCN-NEXT: s_lshl_b32 s6, s6, 4
298
299
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
299
- ; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
300
- ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
301
- ; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
300
+ ; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
301
+ ; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
302
302
; GCN-NEXT: v_mov_b32_e32 v0, s0
303
303
; GCN-NEXT: v_mov_b32_e32 v2, s2
304
304
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
317
317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
318
318
; GCN-NEXT: s_waitcnt lgkmcnt(0)
319
319
; GCN-NEXT: s_lshl_b32 s3, s3, 4
320
+ ; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
320
321
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
321
- ; GCN-NEXT: s_andn2_b32 s2, s2, s3
322
- ; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
323
- ; GCN-NEXT: s_or_b32 s2, s3, s2
322
+ ; GCN-NEXT: s_and_b32 s3, s4, s3
323
+ ; GCN-NEXT: s_xor_b32 s2, s3, s2
324
324
; GCN-NEXT: v_mov_b32_e32 v0, s0
325
325
; GCN-NEXT: v_mov_b32_e32 v1, s1
326
326
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -399,10 +399,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
399
399
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
400
400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
401
401
; GCN-NEXT: s_lshl_b32 s3, s3, 4
402
+ ; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
402
403
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
403
- ; GCN-NEXT: s_andn2_b32 s2, s2, s3
404
- ; GCN-NEXT: s_and_b32 s3, s3, 0x10001
405
- ; GCN-NEXT: s_or_b32 s2, s3, s2
404
+ ; GCN-NEXT: s_and_b32 s3, s4, s3
405
+ ; GCN-NEXT: s_xor_b32 s2, s3, s2
406
406
; GCN-NEXT: v_mov_b32_e32 v0, s0
407
407
; GCN-NEXT: v_mov_b32_e32 v1, s1
408
408
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -417,16 +417,16 @@ entry:
417
417
define amdgpu_kernel void @short4_inselt (ptr addrspace (1 ) %out , <4 x i16 > %vec , i32 %sel ) {
418
418
; GCN-LABEL: short4_inselt:
419
419
; GCN: ; %bb.0: ; %entry
420
- ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
421
420
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421
+ ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
422
422
; GCN-NEXT: s_mov_b32 s4, 0x10001
423
423
; GCN-NEXT: s_mov_b32 s5, s4
424
424
; GCN-NEXT: s_waitcnt lgkmcnt(0)
425
+ ; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
425
426
; GCN-NEXT: s_lshl_b32 s6, s6, 4
426
427
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
427
- ; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
428
- ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
429
- ; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
428
+ ; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
429
+ ; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
430
430
; GCN-NEXT: v_mov_b32_e32 v0, s0
431
431
; GCN-NEXT: v_mov_b32_e32 v2, s2
432
432
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -442,15 +442,15 @@ entry:
442
442
define amdgpu_kernel void @byte8_inselt (ptr addrspace (1 ) %out , <8 x i8 > %vec , i32 %sel ) {
443
443
; GCN-LABEL: byte8_inselt:
444
444
; GCN: ; %bb.0: ; %entry
445
- ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
446
445
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
446
+ ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
447
447
; GCN-NEXT: s_waitcnt lgkmcnt(0)
448
- ; GCN-NEXT: s_lshl_b32 s4, s6, 3
449
- ; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
450
- ; GCN-NEXT: s_and_b32 s7, s5 , 0x1010101
451
- ; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
452
- ; GCN-NEXT: s_andn2_b64 s[2:3 ], s[2:3 ], s[4:5 ]
453
- ; GCN-NEXT: s_or_b64 s[2:3], s[6:7 ], s[2:3]
448
+ ; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
449
+ ; GCN-NEXT: s_lshl_b32 s6, s6, 3
450
+ ; GCN-NEXT: s_xor_b32 s4, s2 , 0x1010101
451
+ ; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
452
+ ; GCN-NEXT: s_and_b64 s[4:5 ], s[4:5 ], s[6:7 ]
453
+ ; GCN-NEXT: s_xor_b64 s[2:3], s[4:5 ], s[2:3]
454
454
; GCN-NEXT: v_mov_b32_e32 v0, s0
455
455
; GCN-NEXT: v_mov_b32_e32 v2, s2
456
456
; GCN-NEXT: v_mov_b32_e32 v1, s1
0 commit comments