Skip to content

Commit bf388f2

Browse files
committed
update affexted tests
1 parent 726a550 commit bf388f2

File tree

11 files changed

+525
-626
lines changed

11 files changed

+525
-626
lines changed

llvm/test/CodeGen/AMDGPU/bfi_int.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
1616
; GFX7-NEXT: s_mov_b32 s7, 0xf000
1717
; GFX7-NEXT: s_mov_b32 s6, -1
1818
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
19-
; GFX7-NEXT: s_andn2_b32 s2, s2, s0
19+
; GFX7-NEXT: s_xor_b32 s1, s1, s2
2020
; GFX7-NEXT: s_and_b32 s0, s1, s0
21-
; GFX7-NEXT: s_or_b32 s0, s2, s0
21+
; GFX7-NEXT: s_xor_b32 s0, s0, s2
2222
; GFX7-NEXT: v_mov_b32_e32 v0, s0
2323
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
2424
; GFX7-NEXT: s_endpgm
@@ -28,9 +28,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
2828
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
2929
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
3030
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
31-
; GFX8-NEXT: s_andn2_b32 s2, s2, s0
31+
; GFX8-NEXT: s_xor_b32 s1, s1, s2
3232
; GFX8-NEXT: s_and_b32 s0, s1, s0
33-
; GFX8-NEXT: s_or_b32 s0, s2, s0
33+
; GFX8-NEXT: s_xor_b32 s0, s0, s2
3434
; GFX8-NEXT: v_mov_b32_e32 v0, s4
3535
; GFX8-NEXT: v_mov_b32_e32 v1, s5
3636
; GFX8-NEXT: v_mov_b32_e32 v2, s0
@@ -44,9 +44,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
4444
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
4545
; GFX10-NEXT: v_mov_b32_e32 v0, 0
4646
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
47-
; GFX10-NEXT: s_andn2_b32 s2, s2, s0
47+
; GFX10-NEXT: s_xor_b32 s1, s1, s2
4848
; GFX10-NEXT: s_and_b32 s0, s1, s0
49-
; GFX10-NEXT: s_or_b32 s0, s2, s0
49+
; GFX10-NEXT: s_xor_b32 s0, s0, s2
5050
; GFX10-NEXT: v_mov_b32_e32 v1, s0
5151
; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
5252
; GFX10-NEXT: s_endpgm
@@ -1407,9 +1407,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14071407
; GFX7-NEXT: s_mov_b32 s7, 0xf000
14081408
; GFX7-NEXT: s_mov_b32 s6, -1
14091409
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1410-
; GFX7-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1411-
; GFX7-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1412-
; GFX7-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1410+
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1411+
; GFX7-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1412+
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14131413
; GFX7-NEXT: s_add_u32 s0, s0, 10
14141414
; GFX7-NEXT: s_addc_u32 s1, s1, 0
14151415
; GFX7-NEXT: v_mov_b32_e32 v0, s0
@@ -1422,9 +1422,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14221422
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14231423
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14241424
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1425-
; GFX8-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1426-
; GFX8-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1427-
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1425+
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1426+
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1427+
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14281428
; GFX8-NEXT: s_add_u32 s0, s0, 10
14291429
; GFX8-NEXT: s_addc_u32 s1, s1, 0
14301430
; GFX8-NEXT: v_mov_b32_e32 v0, s0
@@ -1438,9 +1438,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14381438
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14391439
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14401440
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1441-
; GFX10-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1442-
; GFX10-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1443-
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1441+
; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1442+
; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1443+
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14441444
; GFX10-NEXT: s_add_u32 s0, s0, 10
14451445
; GFX10-NEXT: s_addc_u32 s1, s1, 0
14461446
; GFX10-NEXT: v_mov_b32_e32 v0, s0

llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
define amdgpu_kernel void @bfi_def(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
88
; R600-LABEL: bfi_def:
99
; R600: ; %bb.0: ; %entry
10-
; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
11-
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
10+
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
11+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1212
; R600-NEXT: CF_END
1313
; R600-NEXT: PAD
1414
; R600-NEXT: ALU clause starting at 4:
15-
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
15+
; R600-NEXT: MOV * T0.W, KC0[3].X,
16+
; R600-NEXT: BFI_INT T0.X, KC0[2].Z, KC0[2].W, PV.W,
17+
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1618
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
17-
; R600-NEXT: BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
1819
entry:
1920
%0 = xor i32 %x, -1
2021
%1 = and i32 %z, %0

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,10 @@ define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec,
295295
; GCN-NEXT: s_mov_b32 s5, s4
296296
; GCN-NEXT: s_waitcnt lgkmcnt(0)
297297
; GCN-NEXT: s_lshl_b32 s6, s6, 4
298+
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
298299
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
299-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
300-
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
301-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
300+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
301+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
302302
; GCN-NEXT: v_mov_b32_e32 v0, s0
303303
; GCN-NEXT: v_mov_b32_e32 v2, s2
304304
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
317317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
318318
; GCN-NEXT: s_waitcnt lgkmcnt(0)
319319
; GCN-NEXT: s_lshl_b32 s3, s3, 4
320+
; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
320321
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
321-
; GCN-NEXT: s_andn2_b32 s2, s2, s3
322-
; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
323-
; GCN-NEXT: s_or_b32 s2, s3, s2
322+
; GCN-NEXT: s_and_b32 s3, s4, s3
323+
; GCN-NEXT: s_xor_b32 s2, s3, s2
324324
; GCN-NEXT: v_mov_b32_e32 v0, s0
325325
; GCN-NEXT: v_mov_b32_e32 v1, s1
326326
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -400,10 +400,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
400400
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
401401
; GCN-NEXT: s_waitcnt lgkmcnt(0)
402402
; GCN-NEXT: s_lshl_b32 s3, s3, 4
403+
; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
403404
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
404-
; GCN-NEXT: s_andn2_b32 s2, s2, s3
405-
; GCN-NEXT: s_and_b32 s3, s3, 0x10001
406-
; GCN-NEXT: s_or_b32 s2, s3, s2
405+
; GCN-NEXT: s_and_b32 s3, s4, s3
406+
; GCN-NEXT: s_xor_b32 s2, s3, s2
407407
; GCN-NEXT: v_mov_b32_e32 v0, s0
408408
; GCN-NEXT: v_mov_b32_e32 v1, s1
409409
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -424,10 +424,10 @@ define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec,
424424
; GCN-NEXT: s_mov_b32 s5, s4
425425
; GCN-NEXT: s_waitcnt lgkmcnt(0)
426426
; GCN-NEXT: s_lshl_b32 s6, s6, 4
427+
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
427428
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
428-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
429-
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
430-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
429+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
430+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
431431
; GCN-NEXT: v_mov_b32_e32 v0, s0
432432
; GCN-NEXT: v_mov_b32_e32 v2, s2
433433
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -443,15 +443,15 @@ entry:
443443
define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i32 %sel) {
444444
; GCN-LABEL: byte8_inselt:
445445
; GCN: ; %bb.0: ; %entry
446-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
447446
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
447+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
448448
; GCN-NEXT: s_waitcnt lgkmcnt(0)
449-
; GCN-NEXT: s_lshl_b32 s4, s6, 3
450-
; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
451-
; GCN-NEXT: s_and_b32 s7, s5, 0x1010101
452-
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
453-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
454-
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
449+
; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
450+
; GCN-NEXT: s_lshl_b32 s6, s6, 3
451+
; GCN-NEXT: s_xor_b32 s4, s2, 0x1010101
452+
; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
453+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
454+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
455455
; GCN-NEXT: v_mov_b32_e32 v0, s0
456456
; GCN-NEXT: v_mov_b32_e32 v2, s2
457457
; GCN-NEXT: v_mov_b32_e32 v1, s1

llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll

Lines changed: 50 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,11 +1513,11 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2
15131513
; SI-NEXT: s_waitcnt lgkmcnt(0)
15141514
; SI-NEXT: s_mov_b32 s4, s0
15151515
; SI-NEXT: s_lshl_b32 s0, s3, 4
1516-
; SI-NEXT: s_lshl_b32 s0, 0xffff, s0
15171516
; SI-NEXT: s_mov_b32 s5, s1
1518-
; SI-NEXT: s_andn2_b32 s1, s2, s0
1519-
; SI-NEXT: s_and_b32 s0, s0, 0x50005
1520-
; SI-NEXT: s_or_b32 s0, s0, s1
1517+
; SI-NEXT: s_lshl_b32 s0, 0xffff, s0
1518+
; SI-NEXT: s_xor_b32 s1, s2, 0x50005
1519+
; SI-NEXT: s_and_b32 s0, s1, s0
1520+
; SI-NEXT: s_xor_b32 s0, s0, s2
15211521
; SI-NEXT: v_mov_b32_e32 v0, s0
15221522
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
15231523
; SI-NEXT: s_endpgm
@@ -1530,11 +1530,11 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2
15301530
; VI-NEXT: s_waitcnt lgkmcnt(0)
15311531
; VI-NEXT: s_mov_b32 s4, s0
15321532
; VI-NEXT: s_lshl_b32 s0, s3, 4
1533-
; VI-NEXT: s_lshl_b32 s0, 0xffff, s0
15341533
; VI-NEXT: s_mov_b32 s5, s1
1535-
; VI-NEXT: s_andn2_b32 s1, s2, s0
1536-
; VI-NEXT: s_and_b32 s0, s0, 0x50005
1537-
; VI-NEXT: s_or_b32 s0, s0, s1
1534+
; VI-NEXT: s_lshl_b32 s0, 0xffff, s0
1535+
; VI-NEXT: s_xor_b32 s1, s2, 0x50005
1536+
; VI-NEXT: s_and_b32 s0, s1, s0
1537+
; VI-NEXT: s_xor_b32 s0, s0, s2
15381538
; VI-NEXT: v_mov_b32_e32 v0, s0
15391539
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
15401540
; VI-NEXT: s_endpgm
@@ -1552,13 +1552,13 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
15521552
; SI-NEXT: s_mov_b32 s6, -1
15531553
; SI-NEXT: s_waitcnt lgkmcnt(0)
15541554
; SI-NEXT: s_mov_b32 s4, s0
1555-
; SI-NEXT: s_lshl_b32 s0, s8, 4
1555+
; SI-NEXT: s_lshl_b32 s8, s8, 4
15561556
; SI-NEXT: s_mov_b32 s5, s1
1557-
; SI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0
1558-
; SI-NEXT: s_and_b32 s9, s1, 0x50005
1559-
; SI-NEXT: s_and_b32 s8, s0, 0x50005
1560-
; SI-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1]
1561-
; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
1557+
; SI-NEXT: s_xor_b32 s1, s3, 0x50005
1558+
; SI-NEXT: s_xor_b32 s0, s2, 0x50005
1559+
; SI-NEXT: s_lshl_b64 s[8:9], 0xffff, s8
1560+
; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1561+
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
15621562
; SI-NEXT: v_mov_b32_e32 v0, s1
15631563
; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
15641564
; SI-NEXT: v_mov_b32_e32 v0, s0
@@ -1575,12 +1575,12 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
15751575
; VI-NEXT: s_mov_b32 s4, s0
15761576
; VI-NEXT: s_lshl_b32 s0, s8, 4
15771577
; VI-NEXT: s_mov_b32 s8, 0x50005
1578+
; VI-NEXT: s_mov_b32 s9, s8
15781579
; VI-NEXT: s_mov_b32 s5, s1
15791580
; VI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0
1580-
; VI-NEXT: s_mov_b32 s9, s8
1581-
; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
1582-
; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1583-
; VI-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
1581+
; VI-NEXT: s_xor_b64 s[8:9], s[2:3], s[8:9]
1582+
; VI-NEXT: s_and_b64 s[0:1], s[8:9], s[0:1]
1583+
; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
15841584
; VI-NEXT: v_mov_b32_e32 v0, s1
15851585
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
15861586
; VI-NEXT: v_mov_b32_e32 v0, s0
@@ -1602,9 +1602,9 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8
16021602
; SI-NEXT: s_waitcnt lgkmcnt(0)
16031603
; SI-NEXT: s_lshl_b32 s4, s4, 3
16041604
; SI-NEXT: s_lshl_b32 s4, 0xff, s4
1605-
; SI-NEXT: s_andn2_b32 s5, s5, s4
1606-
; SI-NEXT: s_and_b32 s4, s4, 0x505
1607-
; SI-NEXT: s_or_b32 s4, s4, s5
1605+
; SI-NEXT: s_xor_b32 s6, s5, 0x505
1606+
; SI-NEXT: s_and_b32 s4, s6, s4
1607+
; SI-NEXT: s_xor_b32 s4, s4, s5
16081608
; SI-NEXT: v_mov_b32_e32 v0, s4
16091609
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
16101610
; SI-NEXT: s_endpgm
@@ -1619,10 +1619,9 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8
16191619
; VI-NEXT: s_waitcnt lgkmcnt(0)
16201620
; VI-NEXT: s_lshl_b32 s4, s4, 3
16211621
; VI-NEXT: s_lshl_b32 s4, 0xff, s4
1622-
; VI-NEXT: s_and_b32 s6, s4, 0x505
1623-
; VI-NEXT: s_xor_b32 s4, s4, 0xffff
1624-
; VI-NEXT: s_and_b32 s4, s4, s5
1625-
; VI-NEXT: s_or_b32 s4, s6, s4
1622+
; VI-NEXT: s_xor_b32 s6, s5, 0x505
1623+
; VI-NEXT: s_and_b32 s4, s6, s4
1624+
; VI-NEXT: s_xor_b32 s4, s4, s5
16261625
; VI-NEXT: v_mov_b32_e32 v0, s4
16271626
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
16281627
; VI-NEXT: s_endpgm
@@ -1644,9 +1643,9 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8
16441643
; SI-NEXT: s_waitcnt lgkmcnt(0)
16451644
; SI-NEXT: s_lshl_b32 s4, s4, 3
16461645
; SI-NEXT: s_lshl_b32 s4, 0xff, s4
1647-
; SI-NEXT: s_andn2_b32 s5, s5, s4
1648-
; SI-NEXT: s_and_b32 s4, s4, 0x5050505
1649-
; SI-NEXT: s_or_b32 s4, s4, s5
1646+
; SI-NEXT: s_xor_b32 s6, s5, 0x5050505
1647+
; SI-NEXT: s_and_b32 s4, s6, s4
1648+
; SI-NEXT: s_xor_b32 s4, s4, s5
16501649
; SI-NEXT: s_lshr_b32 s5, s4, 16
16511650
; SI-NEXT: v_mov_b32_e32 v0, s4
16521651
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
@@ -1664,9 +1663,9 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8
16641663
; VI-NEXT: s_waitcnt lgkmcnt(0)
16651664
; VI-NEXT: s_lshl_b32 s4, s4, 3
16661665
; VI-NEXT: s_lshl_b32 s4, 0xff, s4
1667-
; VI-NEXT: s_andn2_b32 s5, s5, s4
1668-
; VI-NEXT: s_and_b32 s4, s4, 0x5050505
1669-
; VI-NEXT: s_or_b32 s4, s4, s5
1666+
; VI-NEXT: s_xor_b32 s6, s5, 0x5050505
1667+
; VI-NEXT: s_and_b32 s4, s6, s4
1668+
; VI-NEXT: s_xor_b32 s4, s4, s5
16701669
; VI-NEXT: s_lshr_b32 s5, s4, 16
16711670
; VI-NEXT: v_mov_b32_e32 v0, s4
16721671
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
@@ -1689,9 +1688,9 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(ptr addrspace(1) %out, [8
16891688
; SI-NEXT: s_waitcnt lgkmcnt(0)
16901689
; SI-NEXT: s_lshl_b32 s4, s4, 3
16911690
; SI-NEXT: s_lshl_b32 s4, 0xff, s4
1692-
; SI-NEXT: s_andn2_b32 s5, s5, s4
1693-
; SI-NEXT: s_and_b32 s4, s4, 0x5050505
1694-
; SI-NEXT: s_or_b32 s4, s4, s5
1691+
; SI-NEXT: s_xor_b32 s6, s5, 0x5050505
1692+
; SI-NEXT: s_and_b32 s4, s6, s4
1693+
; SI-NEXT: s_xor_b32 s4, s4, s5
16951694
; SI-NEXT: v_mov_b32_e32 v0, s4
16961695
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
16971696
; SI-NEXT: s_endpgm
@@ -1706,9 +1705,9 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(ptr addrspace(1) %out, [8
17061705
; VI-NEXT: s_waitcnt lgkmcnt(0)
17071706
; VI-NEXT: s_lshl_b32 s4, s4, 3
17081707
; VI-NEXT: s_lshl_b32 s4, 0xff, s4
1709-
; VI-NEXT: s_andn2_b32 s5, s5, s4
1710-
; VI-NEXT: s_and_b32 s4, s4, 0x5050505
1711-
; VI-NEXT: s_or_b32 s4, s4, s5
1708+
; VI-NEXT: s_xor_b32 s6, s5, 0x5050505
1709+
; VI-NEXT: s_and_b32 s4, s6, s4
1710+
; VI-NEXT: s_xor_b32 s4, s4, s5
17121711
; VI-NEXT: v_mov_b32_e32 v0, s4
17131712
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
17141713
; VI-NEXT: s_endpgm
@@ -1721,20 +1720,20 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p
17211720
; SI-LABEL: s_dynamic_insertelement_v8i8:
17221721
; SI: ; %bb.0:
17231722
; SI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1724-
; SI-NEXT: s_load_dword s8, s[8:9], 0x4
17251723
; SI-NEXT: s_mov_b32 s7, 0x100f000
17261724
; SI-NEXT: s_mov_b32 s6, -1
1725+
; SI-NEXT: s_load_dword s8, s[8:9], 0x4
17271726
; SI-NEXT: s_waitcnt lgkmcnt(0)
17281727
; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
17291728
; SI-NEXT: s_mov_b32 s4, s0
1730-
; SI-NEXT: s_lshl_b32 s0, s8, 3
17311729
; SI-NEXT: s_mov_b32 s5, s1
1732-
; SI-NEXT: s_lshl_b64 s[0:1], 0xff, s0
1733-
; SI-NEXT: s_and_b32 s9, s1, 0x5050505
1730+
; SI-NEXT: s_lshl_b32 s8, s8, 3
17341731
; SI-NEXT: s_waitcnt lgkmcnt(0)
1735-
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
1736-
; SI-NEXT: s_and_b32 s8, s0, 0x5050505
1737-
; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3]
1732+
; SI-NEXT: s_xor_b32 s1, s3, 0x5050505
1733+
; SI-NEXT: s_xor_b32 s0, s2, 0x5050505
1734+
; SI-NEXT: s_lshl_b64 s[8:9], 0xff, s8
1735+
; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1736+
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
17381737
; SI-NEXT: v_mov_b32_e32 v0, s0
17391738
; SI-NEXT: v_mov_b32_e32 v1, s1
17401739
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -1743,20 +1742,20 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p
17431742
; VI-LABEL: s_dynamic_insertelement_v8i8:
17441743
; VI: ; %bb.0:
17451744
; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1746-
; VI-NEXT: s_load_dword s8, s[8:9], 0x10
17471745
; VI-NEXT: s_mov_b32 s7, 0x1100f000
17481746
; VI-NEXT: s_mov_b32 s6, -1
1747+
; VI-NEXT: s_load_dword s8, s[8:9], 0x10
17491748
; VI-NEXT: s_waitcnt lgkmcnt(0)
17501749
; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
17511750
; VI-NEXT: s_mov_b32 s4, s0
1752-
; VI-NEXT: s_lshl_b32 s0, s8, 3
17531751
; VI-NEXT: s_mov_b32 s5, s1
1754-
; VI-NEXT: s_lshl_b64 s[0:1], 0xff, s0
1755-
; VI-NEXT: s_and_b32 s9, s1, 0x5050505
1752+
; VI-NEXT: s_lshl_b32 s8, s8, 3
17561753
; VI-NEXT: s_waitcnt lgkmcnt(0)
1757-
; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
1758-
; VI-NEXT: s_and_b32 s8, s0, 0x5050505
1759-
; VI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3]
1754+
; VI-NEXT: s_xor_b32 s1, s3, 0x5050505
1755+
; VI-NEXT: s_xor_b32 s0, s2, 0x5050505
1756+
; VI-NEXT: s_lshl_b64 s[8:9], 0xff, s8
1757+
; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1758+
; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
17601759
; VI-NEXT: v_mov_b32_e32 v0, s0
17611760
; VI-NEXT: v_mov_b32_e32 v1, s1
17621761
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0

0 commit comments

Comments
 (0)