flintlib
diff --git a/‎doc/source/fmpz_poly.rst‎
Lines changed: 26 additions & 18 deletions b/‎doc/source/fmpz_poly.rst‎
Lines changed: 26 additions & 18 deletions
diff --git a/‎src/fmpq_poly/exp_series.c‎
Lines changed: 16 additions & 8 deletions b/‎src/fmpq_poly/exp_series.c‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎src/fmpq_poly/inv_series_newton.c‎
Lines changed: 2 additions & 2 deletions b/‎src/fmpq_poly/inv_series_newton.c‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/fmpq_poly/invsqrt_series.c‎
Lines changed: 12 additions & 2 deletions b/‎src/fmpq_poly/invsqrt_series.c‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎src/fmpz_poly.h‎
Lines changed: 18 additions & 25 deletions b/‎src/fmpz_poly.h‎
Lines changed: 18 additions & 25 deletions
diff --git a/‎src/fmpz_poly/bit_unpack.c‎
Lines changed: 23 additions & 8 deletions b/‎src/fmpz_poly/bit_unpack.c‎
Lines changed: 23 additions & 8 deletions
diff --git a/‎src/fmpz_poly/gcd_heuristic.c‎
Lines changed: 3 additions & 3 deletions b/‎src/fmpz_poly/gcd_heuristic.c‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/fmpz_poly/inv_series.c‎
Lines changed: 4 additions & 4 deletions b/‎src/fmpz_poly/inv_series.c‎
Lines changed: 4 additions & 4 deletions
@@ -592,15 +592,19 @@ Bit packing
     ``bit_size``, negating the coefficients before packing
     if ``negate`` is set to `-1`.
 
-.. function:: int _fmpz_poly_bit_unpack(fmpz * poly, slong len, nn_srcptr arr, flint_bitcnt_t bit_size, int negate)
+.. function:: int _fmpz_poly_bit_unpack(fmpz * poly, slong nlo, slong nhi, nn_srcptr arr, flint_bitcnt_t bit_size, int negate)
 
-    Unpacks the polynomial of given length from the array as packed into
+    Unpacks the polynomial of given length `nhi` from the array as packed into
     fields of the given ``bit_size``, finally negating the coefficients
     if ``negate`` is set to `-1`. Returns borrow, which is nonzero if a
     leading term with coefficient `\pm1` should be added at
-    position ``len`` of ``poly``.
+    position ``nhi`` of ``poly``.
+
+    If `nlo` is zero, all coefficients are unpacked; otherwise the
+    coefficients in the range `[nlo, nhi)` are unpacked and written to
+    indices `[0, nhi-nlo)` in ``poly``.
 
-.. function:: void _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong len, nn_srcptr arr, flint_bitcnt_t bit_size)
+.. function:: void _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong nlo, slong nhi, nn_srcptr arr, flint_bitcnt_t bit_size)
 
     Unpacks the polynomial of given length from the array as packed into
     fields of the given ``bit_size``.  The coefficients are assumed to
@@ -669,19 +673,24 @@ Multiplication
     remainder to the corresponding coefficients of the product of ``poly1``
     and ``poly2``.
 
-.. function:: void _fmpz_poly_mulmid_classical(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2)
+.. function:: void _fmpz_poly_mulmid_classical(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi)
+              void _fmpz_poly_mulmid_KS(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi)
+              void _fmpz_poly_mulmid_SS(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi)
+              void _fmpz_poly_mulmid(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi)
 
-    Sets ``res`` to the middle ``len1 - len2 + 1`` coefficients of
-    the product of ``(poly1, len1)`` and ``(poly2, len2)``, i.e. the
-    coefficients from degree ``len2 - 1`` to ``len1 - 1`` inclusive.
-    Assumes that ``len1 >= len2 > 0``.
+    Sets ``(res, nhi - nlo)`` to the coefficients at indices `[nlo, nhi)`
+    in the full product of ``(poly1, len1)`` and ``(poly2, len2)``.
+    Assumes that ``len1`` and ``len2`` are positive and that
+    `0 \le nlo < nhi \le len1 + len2 - 1`.
 
 .. function:: void fmpz_poly_mulmid_classical(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2)
+              void fmpz_poly_mulmid_KS(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2)
+              void fmpz_poly_mulmid_SS(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2)
+              void fmpz_poly_mulmid(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2)
 
-    Sets ``res`` to the middle ``len(poly1) - len(poly2) + 1``
-    coefficients of ``poly1 * poly2``, i.e. the coefficient from degree
-    ``len2 - 1`` to ``len1 - 1`` inclusive.  Assumes that
-    ``len1 >= len2``.
+    Sets ``res`` to the polynomial formed by the coefficients at indices `[nlo, nhi)`
+    in the product of ``poly1`` and ``poly2``. Equivalently, compute
+    `[(poly1 \cdot poly2) \bmod x^{nhi}] / x^{nlo}`.
 
 .. function:: void _fmpz_poly_mul_karatsuba(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2)
 
@@ -748,10 +757,10 @@ Multiplication
     Sets ``res`` to the lowest `n` coefficients of the product of
     ``poly1`` and ``poly2``.
 
-.. function:: void _fmpz_poly_mul_SS(fmpz * output, const fmpz * input1, slong length1, const fmpz * input2, slong length2)
+.. function:: void _fmpz_poly_mul_SS(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2)
 
-    Sets ``(output, length1 + length2 - 1)`` to the product of
-    ``(input1, length1)`` and ``(input2, length2)``.
+    Sets ``(res, len1 + len2 - 1)`` to the product of
+    ``(poly1, len1)`` and ``(poly2, len2)``.
 
     We must have ``len1 > 1`` and ``len2 > 1``.  Allows zero-padding
     of the two input polynomials.  Supports aliasing of inputs and outputs.
@@ -761,7 +770,7 @@ Multiplication
     Sets ``res`` to the product of ``poly1`` and ``poly2``. Uses the
     Schönhage-Strassen algorithm.
 
-.. function:: void _fmpz_poly_mullow_SS(fmpz * output, const fmpz * input1, slong length1, const fmpz * input2, slong length2, slong n)
+.. function:: void _fmpz_poly_mullow_SS(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong n)
 
     Sets ``(res, n)`` to the lowest `n` coefficients of the product of
     ``(poly1, len1)`` and ``(poly2, len2)``.
@@ -783,7 +792,6 @@ Multiplication
     zero-padding of the two input polynomials. Does not support aliasing
     between the inputs and the output.
 
-
 .. function:: void fmpz_poly_mul(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2)
 
     Sets ``res`` to the product of ``poly1`` and ``poly2``.  Chooses
 
@@ -194,8 +194,7 @@ static void _fmpq_poly_integral_offset(fmpz * rpoly, fmpz_t rden,
 static void
 MULLOW(fmpz * z, fmpz_t zden, const fmpz * x, const fmpz_t xden, slong xn, const fmpz * y, const fmpz_t yden, slong yn, slong n)
 {
-    if (xn + yn - 1 < n)
-        flint_throw(FLINT_ERROR, "(%s)\n", __func__);
+    FLINT_ASSERT(xn + yn - 1 >= n);
 
     if (xn >= yn)
         _fmpz_poly_mullow(z, x, xn, y, yn, n);
@@ -205,6 +204,15 @@ MULLOW(fmpz * z, fmpz_t zden, const fmpz * x, const fmpz_t xden, slong xn, const
     fmpz_mul(zden, xden, yden);
 }
 
+static void
+MULMID(fmpz * z, fmpz_t zden, const fmpz * x, const fmpz_t xden, slong xn, const fmpz * y, const fmpz_t yden, slong yn, slong nlo, slong nhi)
+{
+    FLINT_ASSERT(xn + yn - 1 >= nhi);
+
+    _fmpz_poly_mulmid(z, x, xn, y, yn, nlo, nhi);
+    fmpz_mul(zden, xden, yden);
+}
+
 /* Assuming that the low m coefficients of poly have denominator
    den in canonical form and that the high n - m coefficients have
    denominator high_den in canonical form, combine the high and lower
@@ -309,9 +317,9 @@ _fmpq_poly_exp_series_newton(fmpz * f, fmpz_t fden,
             CONCATENATE(hprime, hprimeden, uden, m - 1, l);
         }
 
-        MULLOW(t, tden, hprime, hprimeden, l, f, fden, m, r);
-        _fmpq_poly_canonicalise(t + m - 1, tden, r + 1 - m);
-        MULLOW(g + m, uden, g, gden, n - m, t + m - 1, tden, r + 1 - m, n - m);
+        MULMID(t, tden, hprime, hprimeden, l, f, fden, m, m - 1, r);
+        _fmpq_poly_canonicalise(t, tden, r + 1 - m);
+        MULLOW(g + m, uden, g, gden, n - m, t, tden, r + 1 - m, n - m);
         _fmpq_poly_canonicalise(g + m, uden, n - m);
         _fmpq_poly_integral_offset(g + m, uden, g + m, uden, n - m, m);
         MULLOW(f + m, uden, f, fden, n - m, g + m, uden, n - m, n - m);
@@ -323,9 +331,9 @@ _fmpq_poly_exp_series_newton(fmpz * f, fmpz_t fden,
         /* g := exp(-h) + O(x^n); not needed if we only want exp(x) */
         if (i != 0 || inverse)
         {
-            MULLOW(t, tden, f, fden, n, g, gden, m, n);
-            _fmpq_poly_canonicalise(t + m, tden, n - m);
-            MULLOW(g + m, uden, g, gden, m, t + m, tden, n - m, n - m);
+            MULMID(t, tden, f, fden, n, g, gden, m, m, n);
+            _fmpq_poly_canonicalise(t, tden, n - m);
+            MULLOW(g + m, uden, g, gden, m, t, tden, n - m, n - m);
             /* Assuming that the low part is canonicalised on input,
                we just need to canonicalise the high part. */
             _fmpq_poly_canonicalise(g + m, uden, n - m);
 
@@ -115,10 +115,10 @@ _fmpq_poly_inv_series_newton(fmpz * Qinv, fmpz_t Qinvden,
         Wlen = FLINT_MIN(Qnlen + m - 1, n);
         W2len = Wlen - m;
 
-        MULLOW(W, Q, Qnlen, Qinv, m, Wlen);
+        _fmpz_poly_mulmid(W, Q, Qnlen, Qinv, m, m, Wlen);
         fmpz_mul(Wden, Qden, Qinvden);
 
-        MULLOW(Qinv + m, Qinv, m, W + m, W2len, n - m);
+        MULLOW(Qinv + m, Qinv, m, W, W2len, n - m);
         fmpz_mul(Qinvden, Qinvden, Wden);
 
         _fmpz_vec_scalar_mul_fmpz(Qinv, Qinv, m, Wden);
 
@@ -12,8 +12,18 @@
 
 #include "fmpz.h"
 #include "fmpz_vec.h"
+#include "fmpz_poly.h"
 #include "fmpq_poly.h"
 
+static void
+_fmpq_poly_mulmid(fmpz * rpoly, fmpz_t rden,
+                  const fmpz * poly1, const fmpz_t den1, slong len1,
+                  const fmpz * poly2, const fmpz_t den2, slong len2, slong nlo, slong nhi)
+{
+    _fmpz_poly_mulmid(rpoly, poly1, len1, poly2, len2, nlo, nhi);
+    fmpz_mul(rden, den1, den2);
+}
+
 void
 _fmpq_poly_invsqrt_series(fmpz * rpoly, fmpz_t rden,
                       const fmpz * poly, const fmpz_t den, slong len, slong n)
@@ -45,12 +55,12 @@ _fmpq_poly_invsqrt_series(fmpz * rpoly, fmpz_t rden,
         fmpz_zero(t + n - 1);
 
     _fmpq_poly_mullow(u, uden, t, tden, n, rpoly, rden, n, n);
-    _fmpq_poly_mullow(t, tden, u, uden, n, poly, den, len, n);
+    _fmpq_poly_mulmid(t + m, tden, u, uden, n, poly, den, len, m, n);
     _fmpz_vec_neg(t + m, t + m, n - m);
     _fmpz_vec_zero(t, m);
     fmpz_mul_ui(tden, tden, UWORD(2));
     _fmpq_poly_canonicalise(t, tden, n);
-
+    /* todo: concatenate instead of zero+add */
     _fmpq_poly_add(rpoly, rden, rpoly, rden, m, t, tden, n);
 
     fmpz_clear(tden);
 
@@ -331,23 +331,13 @@ void _fmpz_poly_scale_2exp(fmpz * pol, slong len, slong k);
 
 /*  Bit packing  *************************************************************/
 
-void _fmpz_poly_bit_pack(nn_ptr arr, const fmpz * poly,
-                                slong len, flint_bitcnt_t bit_size, int negate);
+void _fmpz_poly_bit_pack(nn_ptr arr, const fmpz * poly, slong len, flint_bitcnt_t bit_size, int negate);
+void fmpz_poly_bit_pack(fmpz_t f, const fmpz_poly_t poly, flint_bitcnt_t bit_size);
 
-int _fmpz_poly_bit_unpack(fmpz * poly, slong len,
-                           nn_srcptr arr, flint_bitcnt_t bit_size, int negate);
-
-void _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong len,
-                                       nn_srcptr arr, flint_bitcnt_t bit_size);
-
-void fmpz_poly_bit_pack(fmpz_t f, const fmpz_poly_t poly,
-        flint_bitcnt_t bit_size);
-
-void fmpz_poly_bit_unpack(fmpz_poly_t poly, const fmpz_t f,
-        flint_bitcnt_t bit_size);
-
-void fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly, const fmpz_t f,
-        flint_bitcnt_t bit_size);
+int _fmpz_poly_bit_unpack(fmpz * poly, slong nlo, slong nhi, nn_srcptr arr, flint_bitcnt_t bit_size, int negate);
+void _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong nlo, slong nhi, nn_srcptr arr, flint_bitcnt_t bit_size);
+void fmpz_poly_bit_unpack(fmpz_poly_t poly, const fmpz_t f, flint_bitcnt_t bit_size);
+void fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly, const fmpz_t f, flint_bitcnt_t bit_size);
 
 
 /*  Multiplication  **********************************************************/
@@ -370,11 +360,14 @@ void _fmpz_poly_mulhigh_classical(fmpz * res, const fmpz * poly1,
 void fmpz_poly_mulhigh_classical(fmpz_poly_t res,
               const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong start);
 
-void _fmpz_poly_mulmid_classical(fmpz * res, const fmpz * poly1,
-                                  slong len1, const fmpz * poly2, slong len2);
-
-void fmpz_poly_mulmid_classical(fmpz_poly_t res,
-                          const fmpz_poly_t poly1, const fmpz_poly_t poly2);
+void _fmpz_poly_mulmid_classical(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi);
+void fmpz_poly_mulmid_classical(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong nlo, slong nhi);
+void _fmpz_poly_mulmid_SS(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi);
+void fmpz_poly_mulmid_SS(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong nlo, slong nhi);
+void _fmpz_poly_mulmid_KS(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi);
+void fmpz_poly_mulmid_KS(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong nlo, slong nhi);
+void _fmpz_poly_mulmid(fmpz * res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong nlo, slong nhi);
+void fmpz_poly_mulmid(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong nlo, slong nhi);
 
 void fmpz_poly_mul_karatsuba(fmpz_poly_t res,
                           const fmpz_poly_t poly1, const fmpz_poly_t poly2);
@@ -409,14 +402,14 @@ void _fmpz_poly_mullow_KS(fmpz * res, const fmpz * poly1, slong len1,
 void fmpz_poly_mullow_KS(fmpz_poly_t res, const fmpz_poly_t poly1,
                                            const fmpz_poly_t poly2, slong n);
 
-void _fmpz_poly_mul_SS(fmpz * output, const fmpz * input1, slong length1,
-                                         const fmpz * input2, slong length2);
+void _fmpz_poly_mul_SS(fmpz * res, const fmpz * poly1, slong len1,
+                                         const fmpz * poly2, slong len2);
 
 void fmpz_poly_mul_SS(fmpz_poly_t res,
                           const fmpz_poly_t poly1, const fmpz_poly_t poly2);
 
-void _fmpz_poly_mullow_SS(fmpz * output, const fmpz * input1, slong length1,
-                                 const fmpz * input2, slong length2, slong n);
+void _fmpz_poly_mullow_SS(fmpz * res, const fmpz * poly1, slong len1,
+                                     const fmpz * poly2, slong len2, slong n);
 
 void fmpz_poly_mullow_SS(fmpz_poly_t res,
                   const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n);
 
@@ -14,7 +14,7 @@
 #include "fmpz_poly.h"
 
 int
-_fmpz_poly_bit_unpack(fmpz * poly, slong len,
+_fmpz_poly_bit_unpack(fmpz * poly, slong nlo, slong nhi,
                       nn_srcptr arr, flint_bitcnt_t bit_size, int negate)
 {
     flint_bitcnt_t bits = 0;
@@ -24,10 +24,18 @@ _fmpz_poly_bit_unpack(fmpz * poly, slong len,
     int borrow = 0;
     slong i;
 
-    for (i = 0; i < len; i++)
+    if (nlo != 0)
+    {
+        borrow = (arr[(nlo * bit_size - 1) / FLINT_BITS] >>
+            ((nlo * bit_size - 1) % FLINT_BITS)) & 1;
+        limbs = (nlo * bit_size) / FLINT_BITS;
+        bits = (nlo * bit_size) % FLINT_BITS;
+    }
+
+    for (i = nlo; i < nhi; i++)
     {
         borrow =
-            fmpz_bit_unpack(poly + i, arr + limbs, bits, bit_size, negate,
+            fmpz_bit_unpack(poly + i - nlo, arr + limbs, bits, bit_size, negate,
                             borrow);
         limbs += l;
         bits += b;
@@ -42,7 +50,7 @@ _fmpz_poly_bit_unpack(fmpz * poly, slong len,
 }
 
 void
-_fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong len,
+_fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong nlo, slong nhi,
                                nn_srcptr arr, flint_bitcnt_t bit_size)
 {
     flint_bitcnt_t bits = 0;
@@ -51,9 +59,15 @@ _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong len,
     slong l = bit_size / FLINT_BITS;
     slong i;
 
-    for (i = 0; i < len; i++)
+    if (nlo != 0)
     {
-        fmpz_bit_unpack_unsigned(poly + i, arr + limbs, bits, bit_size);
+        limbs = (nlo * bit_size) / FLINT_BITS;
+        bits = (nlo * bit_size) % FLINT_BITS;
+    }
+
+    for (i = nlo; i < nhi; i++)
+    {
+        fmpz_bit_unpack_unsigned(poly + i - nlo, arr + limbs, bits, bit_size);
         limbs += l;
         bits += b;
         if (bits >= FLINT_BITS)
@@ -64,6 +78,7 @@ _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong len,
     }
 }
 
+
 void
 fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly, const fmpz_t f,
                                         flint_bitcnt_t bit_size)
@@ -90,7 +105,7 @@ fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly, const fmpz_t f,
 
     fmpz_poly_fit_length(poly, len);
 
-    _fmpz_poly_bit_unpack_unsigned(poly->coeffs, len, tmp->_mp_d, bit_size);
+    _fmpz_poly_bit_unpack_unsigned(poly->coeffs, 0, len, tmp->_mp_d, bit_size);
     _fmpz_poly_set_length(poly, len);
     _fmpz_poly_normalise(poly);
 
@@ -123,7 +138,7 @@ fmpz_poly_bit_unpack(fmpz_poly_t poly, const fmpz_t f, flint_bitcnt_t bit_size)
 
     fmpz_poly_fit_length(poly, len + 1);
 
-    borrow = _fmpz_poly_bit_unpack(poly->coeffs, len,
+    borrow = _fmpz_poly_bit_unpack(poly->coeffs, 0, len,
                     tmp->_mp_d, bit_size, negate);
 
     if (borrow)
 
@@ -190,7 +190,7 @@ _fmpz_poly_gcd_heuristic(fmpz * res, const fmpz * poly1, slong len1,
    flint_mpn_zero(arrayg + limbsg, limbs2-limbsg);
 
    /* unpack gcd */
-   _fmpz_poly_bit_unpack(G, glen, arrayg, pack_bits, 0);
+   _fmpz_poly_bit_unpack(G, 0, glen, arrayg, pack_bits, 0);
    while (G[glen - 1] == 0) glen--;
 
 	/* divide by any content */
@@ -215,7 +215,7 @@ _fmpz_poly_gcd_heuristic(fmpz * res, const fmpz * poly1, slong len1,
       /* unpack quotient of first poly by gcd */
       Q = _fmpz_vec_init(len1);
       t = _fmpz_vec_init(len1 + glen);
-      _fmpz_poly_bit_unpack(Q, qlen, q, pack_bits, 0);
+      _fmpz_poly_bit_unpack(Q, 0, qlen, q, pack_bits, 0);
       while (Q[qlen - 1] == 0) qlen--;
 
       /* divide by content */
@@ -241,7 +241,7 @@ _fmpz_poly_gcd_heuristic(fmpz * res, const fmpz * poly1, slong len1,
          if (flint_mpn_divides(q, array2, limbs2, arrayg, limbsg, temp))
 	      {
             /* unpack quotient of second poly by gcd */
-            _fmpz_poly_bit_unpack(Q, qlen2, q, pack_bits, 0);
+            _fmpz_poly_bit_unpack(Q, 0, qlen2, q, pack_bits, 0);
             while (Q[qlen2 - 1] == 0) qlen2--;
 
 			/* check if we really need to multiply out to check for exact quotient */
 
@@ -282,7 +282,7 @@ _fmpz_poly_inv_series_newton(fmpz * Qinv, const fmpz * Q, slong Qlen, slong n)
         slong *a, i, m, Qnlen, Wlen, W2len;
         fmpz * W;
 
-        W = _fmpz_vec_init(n);
+        W = _fmpz_vec_init(n / 2);
         a = flint_malloc(sizeof(slong) * FLINT_BITS);
 
         a[i = 0] = n;
@@ -299,12 +299,12 @@ _fmpz_poly_inv_series_newton(fmpz * Qinv, const fmpz * Q, slong Qlen, slong n)
             Qnlen = FLINT_MIN(Qlen, n);
             Wlen = FLINT_MIN(Qnlen + m - 1, n);
             W2len = Wlen - m;
-            MULLOW(W, Q, Qnlen, Qinv, m, Wlen);
-            MULLOW(Qinv + m, Qinv, m, W + m, W2len, n - m);
+            _fmpz_poly_mulmid(W, Q, Qnlen, Qinv, m, m, Wlen);
+            MULLOW(Qinv + m, Qinv, m, W, W2len, n - m);
             _fmpz_vec_neg(Qinv + m, Qinv + m, n - m);
         }
 
-        _fmpz_vec_clear(W, n);
+        _fmpz_vec_clear(W, n / 2);
         flint_free(a);
     }
 }