@@ -289,16 +289,16 @@ entry:
289289define amdgpu_kernel void @half4_inselt (ptr addrspace (1 ) %out , <4 x half > %vec , i32 %sel ) {
290290; GCN-LABEL: half4_inselt:
291291; GCN: ; %bb.0: ; %entry
292- ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
293292; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
293+ ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
294294; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
295295; GCN-NEXT: s_mov_b32 s5, s4
296296; GCN-NEXT: s_waitcnt lgkmcnt(0)
297+ ; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
297298; GCN-NEXT: s_lshl_b32 s6, s6, 4
298299; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
299- ; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
300- ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
301- ; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
300+ ; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
301+ ; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
302302; GCN-NEXT: v_mov_b32_e32 v0, s0
303303; GCN-NEXT: v_mov_b32_e32 v2, s2
304304; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
317317; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
318318; GCN-NEXT: s_waitcnt lgkmcnt(0)
319319; GCN-NEXT: s_lshl_b32 s3, s3, 4
320+ ; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
320321; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
321- ; GCN-NEXT: s_andn2_b32 s2, s2, s3
322- ; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
323- ; GCN-NEXT: s_or_b32 s2, s3, s2
322+ ; GCN-NEXT: s_and_b32 s3, s4, s3
323+ ; GCN-NEXT: s_xor_b32 s2, s3, s2
324324; GCN-NEXT: v_mov_b32_e32 v0, s0
325325; GCN-NEXT: v_mov_b32_e32 v1, s1
326326; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -399,10 +399,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
399399; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
400400; GCN-NEXT: s_waitcnt lgkmcnt(0)
401401; GCN-NEXT: s_lshl_b32 s3, s3, 4
402+ ; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
402403; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
403- ; GCN-NEXT: s_andn2_b32 s2, s2, s3
404- ; GCN-NEXT: s_and_b32 s3, s3, 0x10001
405- ; GCN-NEXT: s_or_b32 s2, s3, s2
404+ ; GCN-NEXT: s_and_b32 s3, s4, s3
405+ ; GCN-NEXT: s_xor_b32 s2, s3, s2
406406; GCN-NEXT: v_mov_b32_e32 v0, s0
407407; GCN-NEXT: v_mov_b32_e32 v1, s1
408408; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -417,16 +417,16 @@ entry:
417417define amdgpu_kernel void @short4_inselt (ptr addrspace (1 ) %out , <4 x i16 > %vec , i32 %sel ) {
418418; GCN-LABEL: short4_inselt:
419419; GCN: ; %bb.0: ; %entry
420- ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
421420; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421+ ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
422422; GCN-NEXT: s_mov_b32 s4, 0x10001
423423; GCN-NEXT: s_mov_b32 s5, s4
424424; GCN-NEXT: s_waitcnt lgkmcnt(0)
425+ ; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
425426; GCN-NEXT: s_lshl_b32 s6, s6, 4
426427; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
427- ; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
428- ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
429- ; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
428+ ; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
429+ ; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
430430; GCN-NEXT: v_mov_b32_e32 v0, s0
431431; GCN-NEXT: v_mov_b32_e32 v2, s2
432432; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -442,15 +442,15 @@ entry:
442442define amdgpu_kernel void @byte8_inselt (ptr addrspace (1 ) %out , <8 x i8 > %vec , i32 %sel ) {
443443; GCN-LABEL: byte8_inselt:
444444; GCN: ; %bb.0: ; %entry
445- ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
446445; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
446+ ; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
447447; GCN-NEXT: s_waitcnt lgkmcnt(0)
448- ; GCN-NEXT: s_lshl_b32 s4, s6, 3
449- ; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
450- ; GCN-NEXT: s_and_b32 s7, s5 , 0x1010101
451- ; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
452- ; GCN-NEXT: s_andn2_b64 s[2:3 ], s[2:3 ], s[4:5 ]
453- ; GCN-NEXT: s_or_b64 s[2:3], s[6:7 ], s[2:3]
448+ ; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
449+ ; GCN-NEXT: s_lshl_b32 s6, s6, 3
450+ ; GCN-NEXT: s_xor_b32 s4, s2 , 0x1010101
451+ ; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
452+ ; GCN-NEXT: s_and_b64 s[4:5 ], s[4:5 ], s[6:7 ]
453+ ; GCN-NEXT: s_xor_b64 s[2:3], s[4:5 ], s[2:3]
454454; GCN-NEXT: v_mov_b32_e32 v0, s0
455455; GCN-NEXT: v_mov_b32_e32 v2, s2
456456; GCN-NEXT: v_mov_b32_e32 v1, s1
0 commit comments