@@ -1513,11 +1513,11 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2
15131513; SI-NEXT: s_waitcnt lgkmcnt(0)
15141514; SI-NEXT: s_mov_b32 s4, s0
15151515; SI-NEXT: s_lshl_b32 s0, s3, 4
1516- ; SI-NEXT: s_lshl_b32 s0, 0xffff, s0
15171516; SI-NEXT: s_mov_b32 s5, s1
1518- ; SI-NEXT: s_andn2_b32 s1, s2, s0
1519- ; SI-NEXT: s_and_b32 s0, s0, 0x50005
1520- ; SI-NEXT: s_or_b32 s0, s0, s1
1517+ ; SI-NEXT: s_lshl_b32 s0, 0xffff, s0
1518+ ; SI-NEXT: s_xor_b32 s1, s2, 0x50005
1519+ ; SI-NEXT: s_and_b32 s0, s1, s0
1520+ ; SI-NEXT: s_xor_b32 s0, s0, s2
15211521; SI-NEXT: v_mov_b32_e32 v0, s0
15221522; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
15231523; SI-NEXT: s_endpgm
@@ -1530,11 +1530,11 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2
15301530; VI-NEXT: s_waitcnt lgkmcnt(0)
15311531; VI-NEXT: s_mov_b32 s4, s0
15321532; VI-NEXT: s_lshl_b32 s0, s3, 4
1533- ; VI-NEXT: s_lshl_b32 s0, 0xffff, s0
15341533; VI-NEXT: s_mov_b32 s5, s1
1535- ; VI-NEXT: s_andn2_b32 s1, s2, s0
1536- ; VI-NEXT: s_and_b32 s0, s0, 0x50005
1537- ; VI-NEXT: s_or_b32 s0, s0, s1
1534+ ; VI-NEXT: s_lshl_b32 s0, 0xffff, s0
1535+ ; VI-NEXT: s_xor_b32 s1, s2, 0x50005
1536+ ; VI-NEXT: s_and_b32 s0, s1, s0
1537+ ; VI-NEXT: s_xor_b32 s0, s0, s2
15381538; VI-NEXT: v_mov_b32_e32 v0, s0
15391539; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
15401540; VI-NEXT: s_endpgm
@@ -1552,13 +1552,13 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
15521552; SI-NEXT: s_mov_b32 s6, -1
15531553; SI-NEXT: s_waitcnt lgkmcnt(0)
15541554; SI-NEXT: s_mov_b32 s4, s0
1555- ; SI-NEXT: s_lshl_b32 s0 , s8, 4
1555+ ; SI-NEXT: s_lshl_b32 s8 , s8, 4
15561556; SI-NEXT: s_mov_b32 s5, s1
1557- ; SI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0
1558- ; SI-NEXT: s_and_b32 s9, s1 , 0x50005
1559- ; SI-NEXT: s_and_b32 s8, s0, 0x50005
1560- ; SI-NEXT: s_andn2_b64 s[0:1], s[2:3 ], s[0:1 ]
1561- ; SI-NEXT: s_or_b64 s[0:1], s[8:9 ], s[0:1 ]
1557+ ; SI-NEXT: s_xor_b32 s1, s3, 0x50005
1558+ ; SI-NEXT: s_xor_b32 s0, s2 , 0x50005
1559+ ; SI-NEXT: s_lshl_b64 s[8:9], 0xffff, s8
1560+ ; SI-NEXT: s_and_b64 s[0:1], s[0:1 ], s[8:9 ]
1561+ ; SI-NEXT: s_xor_b64 s[0:1], s[0:1 ], s[2:3 ]
15621562; SI-NEXT: v_mov_b32_e32 v0, s1
15631563; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
15641564; SI-NEXT: v_mov_b32_e32 v0, s0
@@ -1575,12 +1575,12 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
15751575; VI-NEXT: s_mov_b32 s4, s0
15761576; VI-NEXT: s_lshl_b32 s0, s8, 4
15771577; VI-NEXT: s_mov_b32 s8, 0x50005
1578+ ; VI-NEXT: s_mov_b32 s9, s8
15781579; VI-NEXT: s_mov_b32 s5, s1
15791580; VI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0
1580- ; VI-NEXT: s_mov_b32 s9, s8
1581- ; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
1582- ; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1583- ; VI-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
1581+ ; VI-NEXT: s_xor_b64 s[8:9], s[2:3], s[8:9]
1582+ ; VI-NEXT: s_and_b64 s[0:1], s[8:9], s[0:1]
1583+ ; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
15841584; VI-NEXT: v_mov_b32_e32 v0, s1
15851585; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
15861586; VI-NEXT: v_mov_b32_e32 v0, s0
@@ -1602,9 +1602,9 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8
16021602; SI-NEXT: s_waitcnt lgkmcnt(0)
16031603; SI-NEXT: s_lshl_b32 s4, s4, 3
16041604; SI-NEXT: s_lshl_b32 s4, 0xff, s4
1605- ; SI-NEXT: s_andn2_b32 s5 , s5, s4
1606- ; SI-NEXT: s_and_b32 s4, s4, 0x505
1607- ; SI-NEXT: s_or_b32 s4, s4, s5
1605+ ; SI-NEXT: s_xor_b32 s6 , s5, 0x505
1606+ ; SI-NEXT: s_and_b32 s4, s6, s4
1607+ ; SI-NEXT: s_xor_b32 s4, s4, s5
16081608; SI-NEXT: v_mov_b32_e32 v0, s4
16091609; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
16101610; SI-NEXT: s_endpgm
@@ -1619,10 +1619,9 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8
16191619; VI-NEXT: s_waitcnt lgkmcnt(0)
16201620; VI-NEXT: s_lshl_b32 s4, s4, 3
16211621; VI-NEXT: s_lshl_b32 s4, 0xff, s4
1622- ; VI-NEXT: s_and_b32 s6, s4, 0x505
1623- ; VI-NEXT: s_xor_b32 s4, s4, 0xffff
1624- ; VI-NEXT: s_and_b32 s4, s4, s5
1625- ; VI-NEXT: s_or_b32 s4, s6, s4
1622+ ; VI-NEXT: s_xor_b32 s6, s5, 0x505
1623+ ; VI-NEXT: s_and_b32 s4, s6, s4
1624+ ; VI-NEXT: s_xor_b32 s4, s4, s5
16261625; VI-NEXT: v_mov_b32_e32 v0, s4
16271626; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
16281627; VI-NEXT: s_endpgm
@@ -1644,9 +1643,9 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8
16441643; SI-NEXT: s_waitcnt lgkmcnt(0)
16451644; SI-NEXT: s_lshl_b32 s4, s4, 3
16461645; SI-NEXT: s_lshl_b32 s4, 0xff, s4
1647- ; SI-NEXT: s_andn2_b32 s5 , s5, s4
1648- ; SI-NEXT: s_and_b32 s4, s4, 0x5050505
1649- ; SI-NEXT: s_or_b32 s4, s4, s5
1646+ ; SI-NEXT: s_xor_b32 s6 , s5, 0x5050505
1647+ ; SI-NEXT: s_and_b32 s4, s6, s4
1648+ ; SI-NEXT: s_xor_b32 s4, s4, s5
16501649; SI-NEXT: s_lshr_b32 s5, s4, 16
16511650; SI-NEXT: v_mov_b32_e32 v0, s4
16521651; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
@@ -1664,9 +1663,9 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8
16641663; VI-NEXT: s_waitcnt lgkmcnt(0)
16651664; VI-NEXT: s_lshl_b32 s4, s4, 3
16661665; VI-NEXT: s_lshl_b32 s4, 0xff, s4
1667- ; VI-NEXT: s_andn2_b32 s5 , s5, s4
1668- ; VI-NEXT: s_and_b32 s4, s4, 0x5050505
1669- ; VI-NEXT: s_or_b32 s4, s4, s5
1666+ ; VI-NEXT: s_xor_b32 s6 , s5, 0x5050505
1667+ ; VI-NEXT: s_and_b32 s4, s6, s4
1668+ ; VI-NEXT: s_xor_b32 s4, s4, s5
16701669; VI-NEXT: s_lshr_b32 s5, s4, 16
16711670; VI-NEXT: v_mov_b32_e32 v0, s4
16721671; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
@@ -1689,9 +1688,9 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(ptr addrspace(1) %out, [8
16891688; SI-NEXT: s_waitcnt lgkmcnt(0)
16901689; SI-NEXT: s_lshl_b32 s4, s4, 3
16911690; SI-NEXT: s_lshl_b32 s4, 0xff, s4
1692- ; SI-NEXT: s_andn2_b32 s5 , s5, s4
1693- ; SI-NEXT: s_and_b32 s4, s4, 0x5050505
1694- ; SI-NEXT: s_or_b32 s4, s4, s5
1691+ ; SI-NEXT: s_xor_b32 s6 , s5, 0x5050505
1692+ ; SI-NEXT: s_and_b32 s4, s6, s4
1693+ ; SI-NEXT: s_xor_b32 s4, s4, s5
16951694; SI-NEXT: v_mov_b32_e32 v0, s4
16961695; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
16971696; SI-NEXT: s_endpgm
@@ -1706,9 +1705,9 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(ptr addrspace(1) %out, [8
17061705; VI-NEXT: s_waitcnt lgkmcnt(0)
17071706; VI-NEXT: s_lshl_b32 s4, s4, 3
17081707; VI-NEXT: s_lshl_b32 s4, 0xff, s4
1709- ; VI-NEXT: s_andn2_b32 s5 , s5, s4
1710- ; VI-NEXT: s_and_b32 s4, s4, 0x5050505
1711- ; VI-NEXT: s_or_b32 s4, s4, s5
1708+ ; VI-NEXT: s_xor_b32 s6 , s5, 0x5050505
1709+ ; VI-NEXT: s_and_b32 s4, s6, s4
1710+ ; VI-NEXT: s_xor_b32 s4, s4, s5
17121711; VI-NEXT: v_mov_b32_e32 v0, s4
17131712; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
17141713; VI-NEXT: s_endpgm
@@ -1721,20 +1720,20 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p
17211720; SI-LABEL: s_dynamic_insertelement_v8i8:
17221721; SI: ; %bb.0:
17231722; SI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1724- ; SI-NEXT: s_load_dword s8, s[8:9], 0x4
17251723; SI-NEXT: s_mov_b32 s7, 0x100f000
17261724; SI-NEXT: s_mov_b32 s6, -1
1725+ ; SI-NEXT: s_load_dword s8, s[8:9], 0x4
17271726; SI-NEXT: s_waitcnt lgkmcnt(0)
17281727; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
17291728; SI-NEXT: s_mov_b32 s4, s0
1730- ; SI-NEXT: s_lshl_b32 s0, s8, 3
17311729; SI-NEXT: s_mov_b32 s5, s1
1732- ; SI-NEXT: s_lshl_b64 s[0:1], 0xff, s0
1733- ; SI-NEXT: s_and_b32 s9, s1, 0x5050505
1730+ ; SI-NEXT: s_lshl_b32 s8, s8, 3
17341731; SI-NEXT: s_waitcnt lgkmcnt(0)
1735- ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
1736- ; SI-NEXT: s_and_b32 s8, s0, 0x5050505
1737- ; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3]
1732+ ; SI-NEXT: s_xor_b32 s1, s3, 0x5050505
1733+ ; SI-NEXT: s_xor_b32 s0, s2, 0x5050505
1734+ ; SI-NEXT: s_lshl_b64 s[8:9], 0xff, s8
1735+ ; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1736+ ; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
17381737; SI-NEXT: v_mov_b32_e32 v0, s0
17391738; SI-NEXT: v_mov_b32_e32 v1, s1
17401739; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -1743,20 +1742,20 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p
17431742; VI-LABEL: s_dynamic_insertelement_v8i8:
17441743; VI: ; %bb.0:
17451744; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1746- ; VI-NEXT: s_load_dword s8, s[8:9], 0x10
17471745; VI-NEXT: s_mov_b32 s7, 0x1100f000
17481746; VI-NEXT: s_mov_b32 s6, -1
1747+ ; VI-NEXT: s_load_dword s8, s[8:9], 0x10
17491748; VI-NEXT: s_waitcnt lgkmcnt(0)
17501749; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
17511750; VI-NEXT: s_mov_b32 s4, s0
1752- ; VI-NEXT: s_lshl_b32 s0, s8, 3
17531751; VI-NEXT: s_mov_b32 s5, s1
1754- ; VI-NEXT: s_lshl_b64 s[0:1], 0xff, s0
1755- ; VI-NEXT: s_and_b32 s9, s1, 0x5050505
1752+ ; VI-NEXT: s_lshl_b32 s8, s8, 3
17561753; VI-NEXT: s_waitcnt lgkmcnt(0)
1757- ; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
1758- ; VI-NEXT: s_and_b32 s8, s0, 0x5050505
1759- ; VI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3]
1754+ ; VI-NEXT: s_xor_b32 s1, s3, 0x5050505
1755+ ; VI-NEXT: s_xor_b32 s0, s2, 0x5050505
1756+ ; VI-NEXT: s_lshl_b64 s[8:9], 0xff, s8
1757+ ; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
1758+ ; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
17601759; VI-NEXT: v_mov_b32_e32 v0, s0
17611760; VI-NEXT: v_mov_b32_e32 v1, s1
17621761; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
0 commit comments