@@ -1734,3 +1734,103 @@ entry:
17341734 %c = shufflevector <16 x i64 > %a , <16 x i64 > %b , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
17351735 ret <16 x i64 > %c
17361736}
1737+
1738+ define <4 x i64 > @unzip2b_dual_v4i64 (<4 x i64 > %a , <4 x i64 > %b ) {
1739+ ; V-LABEL: unzip2b_dual_v4i64:
1740+ ; V: # %bb.0: # %entry
1741+ ; V-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1742+ ; V-NEXT: vmv.v.i v0, 2
1743+ ; V-NEXT: vslidedown.vi v10, v8, 1
1744+ ; V-NEXT: vslidedown.vi v10, v8, 2, v0.t
1745+ ; V-NEXT: vmv.v.i v0, 4
1746+ ; V-NEXT: vmv1r.v v8, v9
1747+ ; V-NEXT: vslideup.vi v8, v9, 1, v0.t
1748+ ; V-NEXT: vmv.v.i v0, 12
1749+ ; V-NEXT: vmerge.vvm v8, v10, v8, v0
1750+ ; V-NEXT: ret
1751+ ;
1752+ ; ZVE32F-LABEL: unzip2b_dual_v4i64:
1753+ ; ZVE32F: # %bb.0: # %entry
1754+ ; ZVE32F-NEXT: ld a3, 8(a2)
1755+ ; ZVE32F-NEXT: ld a2, 24(a2)
1756+ ; ZVE32F-NEXT: ld a4, 8(a1)
1757+ ; ZVE32F-NEXT: ld a1, 24(a1)
1758+ ; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
1759+ ; ZVE32F-NEXT: vmv.v.i v0, 15
1760+ ; ZVE32F-NEXT: srli a5, a2, 32
1761+ ; ZVE32F-NEXT: srli a6, a3, 32
1762+ ; ZVE32F-NEXT: srli a7, a1, 32
1763+ ; ZVE32F-NEXT: srli t0, a4, 32
1764+ ; ZVE32F-NEXT: vmv.v.x v8, a4
1765+ ; ZVE32F-NEXT: vmv.v.x v9, a3
1766+ ; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
1767+ ; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
1768+ ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
1769+ ; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
1770+ ; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
1771+ ; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
1772+ ; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
1773+ ; ZVE32F-NEXT: vse32.v v9, (a0)
1774+ ; ZVE32F-NEXT: ret
1775+ ;
1776+ ; ZIP-LABEL: unzip2b_dual_v4i64:
1777+ ; ZIP: # %bb.0: # %entry
1778+ ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1779+ ; ZIP-NEXT: ri.vunzip2b.vv v11, v9, v10
1780+ ; ZIP-NEXT: ri.vunzip2b.vv v9, v8, v10
1781+ ; ZIP-NEXT: vslideup.vi v9, v11, 2
1782+ ; ZIP-NEXT: vmv.v.v v8, v9
1783+ ; ZIP-NEXT: ret
1784+ entry:
1785+ %c = shufflevector <4 x i64 > %a , <4 x i64 > %b , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
1786+ ret <4 x i64 > %c
1787+ }
1788+
1789+ define <4 x i64 > @unzip2b_dual_v4i64_exact (<4 x i64 > %a , <4 x i64 > %b ) vscale_range(4 ,4 ) {
1790+ ; V-LABEL: unzip2b_dual_v4i64_exact:
1791+ ; V: # %bb.0: # %entry
1792+ ; V-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1793+ ; V-NEXT: vmv.v.i v0, 2
1794+ ; V-NEXT: vslidedown.vi v10, v8, 1
1795+ ; V-NEXT: vslidedown.vi v10, v8, 2, v0.t
1796+ ; V-NEXT: vmv.v.i v0, 4
1797+ ; V-NEXT: vmv1r.v v8, v9
1798+ ; V-NEXT: vslideup.vi v8, v9, 1, v0.t
1799+ ; V-NEXT: vmv.v.i v0, 12
1800+ ; V-NEXT: vmerge.vvm v8, v10, v8, v0
1801+ ; V-NEXT: ret
1802+ ;
1803+ ; ZVE32F-LABEL: unzip2b_dual_v4i64_exact:
1804+ ; ZVE32F: # %bb.0: # %entry
1805+ ; ZVE32F-NEXT: ld a3, 8(a2)
1806+ ; ZVE32F-NEXT: ld a2, 24(a2)
1807+ ; ZVE32F-NEXT: ld a4, 8(a1)
1808+ ; ZVE32F-NEXT: ld a1, 24(a1)
1809+ ; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
1810+ ; ZVE32F-NEXT: vmv.v.i v0, 15
1811+ ; ZVE32F-NEXT: srli a5, a2, 32
1812+ ; ZVE32F-NEXT: srli a6, a3, 32
1813+ ; ZVE32F-NEXT: srli a7, a1, 32
1814+ ; ZVE32F-NEXT: srli t0, a4, 32
1815+ ; ZVE32F-NEXT: vmv.v.x v8, a4
1816+ ; ZVE32F-NEXT: vmv.v.x v9, a3
1817+ ; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
1818+ ; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
1819+ ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
1820+ ; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
1821+ ; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
1822+ ; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
1823+ ; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
1824+ ; ZVE32F-NEXT: vs1r.v v9, (a0)
1825+ ; ZVE32F-NEXT: ret
1826+ ;
1827+ ; ZIP-LABEL: unzip2b_dual_v4i64_exact:
1828+ ; ZIP: # %bb.0: # %entry
1829+ ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1830+ ; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9
1831+ ; ZIP-NEXT: vmv.v.v v8, v10
1832+ ; ZIP-NEXT: ret
1833+ entry:
1834+ %c = shufflevector <4 x i64 > %a , <4 x i64 > %b , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
1835+ ret <4 x i64 > %c
1836+ }
0 commit comments