@@ -1364,13 +1364,11 @@ define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
13641364;
13651365; ZIP-LABEL: unzip2a_dual_v4i64:
13661366; ZIP: # %bb.0: # %entry
1367- ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1368- ; ZIP-NEXT: vmv.v.i v0, 8
1369- ; ZIP-NEXT: vslideup.vi v10, v9, 2
1370- ; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
1371- ; ZIP-NEXT: vmv.v.i v0, 12
1372- ; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
1373- ; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
1367+ ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1368+ ; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1369+ ; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1370+ ; ZIP-NEXT: vslideup.vi v9, v11, 2
1371+ ; ZIP-NEXT: vmv.v.v v8, v9
13741372; ZIP-NEXT: ret
13751373entry:
13761374 %c = shufflevector <4 x i64 > %a , <4 x i64 > %b , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
@@ -1502,16 +1500,11 @@ define <16 x i64> @unzip2a_dual_v16i64(<16 x i64> %a, <16 x i64> %b) {
15021500; ZIP-LABEL: unzip2a_dual_v16i64:
15031501; ZIP: # %bb.0: # %entry
15041502; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
1505- ; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v10
1506- ; ZIP-NEXT: vsetivli zero, 16, e16, m1, ta, ma
1507- ; ZIP-NEXT: vid.v v8
1508- ; ZIP-NEXT: li a0, -256
1509- ; ZIP-NEXT: vadd.vv v8, v8, v8
1510- ; ZIP-NEXT: vmv.s.x v0, a0
1511- ; ZIP-NEXT: vadd.vi v8, v8, -16
1512- ; ZIP-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1513- ; ZIP-NEXT: vrgatherei16.vv v16, v12, v8, v0.t
1514- ; ZIP-NEXT: vmv.v.v v8, v16
1503+ ; ZIP-NEXT: ri.vunzip2a.vv v16, v12, v14
1504+ ; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
1505+ ; ZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
1506+ ; ZIP-NEXT: vslideup.vi v12, v16, 8
1507+ ; ZIP-NEXT: vmv.v.v v8, v12
15151508; ZIP-NEXT: ret
15161509entry:
15171510 %c = shufflevector <16 x i64 > %a , <16 x i64 > %b , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
@@ -1557,13 +1550,11 @@ define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
15571550;
15581551; ZIP-LABEL: unzip2a_dual_v4i64_exact:
15591552; ZIP: # %bb.0: # %entry
1560- ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1561- ; ZIP-NEXT: vmv.v.i v0, 8
1562- ; ZIP-NEXT: vslideup.vi v10, v9, 2
1563- ; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
1564- ; ZIP-NEXT: vmv.v.i v0, 12
1565- ; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
1566- ; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
1553+ ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1554+ ; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1555+ ; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1556+ ; ZIP-NEXT: vslideup.vi v9, v11, 2
1557+ ; ZIP-NEXT: vmv.v.v v8, v9
15671558; ZIP-NEXT: ret
15681559entry:
15691560 %c = shufflevector <4 x i64 > %a , <4 x i64 > %b , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
@@ -1609,13 +1600,12 @@ define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscal
16091600;
16101601; ZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
16111602; ZIP: # %bb.0: # %entry
1612- ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1613- ; ZIP-NEXT: vmv.v.i v0, 8
1614- ; ZIP-NEXT: vslideup.vi v10, v9, 2
1615- ; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
1616- ; ZIP-NEXT: vmv.v.i v0, 12
1617- ; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
1618- ; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
1603+ ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1604+ ; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1605+ ; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1606+ ; ZIP-NEXT: vsetvli zero, zero, e64, m1, tu, ma
1607+ ; ZIP-NEXT: vslideup.vi v9, v11, 2
1608+ ; ZIP-NEXT: vmv1r.v v8, v9
16191609; ZIP-NEXT: ret
16201610entry:
16211611 %c = shufflevector <4 x i64 > %a , <4 x i64 > %b , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
@@ -1740,37 +1730,10 @@ define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscal
17401730;
17411731; ZIP-LABEL: unzip2a_dual_v16i64_exact:
17421732; ZIP: # %bb.0: # %entry
1743- ; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1744- ; ZIP-NEXT: vslideup.vi v18, v15, 2
1745- ; ZIP-NEXT: vmv.v.i v16, 8
1746- ; ZIP-NEXT: vmv.v.i v17, 12
1747- ; ZIP-NEXT: vslideup.vi v20, v13, 2
1748- ; ZIP-NEXT: vmv.v.v v0, v16
1749- ; ZIP-NEXT: vslideup.vi v18, v15, 1, v0.t
1750- ; ZIP-NEXT: ri.vunzip2a.vv v15, v14, v19
1751- ; ZIP-NEXT: vmv.v.v v0, v17
1752- ; ZIP-NEXT: vmerge.vvm v15, v15, v18, v0
1753- ; ZIP-NEXT: vmv.v.v v0, v16
1754- ; ZIP-NEXT: vslideup.vi v20, v13, 1, v0.t
1755- ; ZIP-NEXT: ri.vunzip2a.vv v14, v12, v13
1756- ; ZIP-NEXT: vslideup.vi v12, v11, 2
1757- ; ZIP-NEXT: vslideup.vi v18, v9, 2
1758- ; ZIP-NEXT: vmv.v.v v0, v17
1759- ; ZIP-NEXT: vmerge.vvm v14, v14, v20, v0
1760- ; ZIP-NEXT: li a0, -256
1761- ; ZIP-NEXT: ri.vunzip2a.vv v20, v10, v13
1762- ; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v19
1763- ; ZIP-NEXT: vmv.v.v v0, v16
1764- ; ZIP-NEXT: vslideup.vi v12, v11, 1, v0.t
1765- ; ZIP-NEXT: vmv.v.v v0, v17
1766- ; ZIP-NEXT: vmerge.vvm v13, v20, v12, v0
1767- ; ZIP-NEXT: vmv.v.v v0, v16
1768- ; ZIP-NEXT: vslideup.vi v18, v9, 1, v0.t
1769- ; ZIP-NEXT: vmv.v.v v0, v17
1770- ; ZIP-NEXT: vmerge.vvm v12, v10, v18, v0
1771- ; ZIP-NEXT: vmv.s.x v0, a0
1772- ; ZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
1773- ; ZIP-NEXT: vmerge.vvm v8, v12, v12, v0
1733+ ; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
1734+ ; ZIP-NEXT: ri.vunzip2a.vv v18, v12, v14
1735+ ; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v10
1736+ ; ZIP-NEXT: vmv4r.v v8, v16
17741737; ZIP-NEXT: ret
17751738entry:
17761739 %c = shufflevector <16 x i64 > %a , <16 x i64 > %b , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
0 commit comments