Skip to content

Commit cdba735

Browse files
Merge pull request #2618 from fredrik-johansson/mulmid3
Use middle products in `gr_mat_gr_poly_solve_lode_newton`
2 parents 6a7cca0 + 9e0d258 commit cdba735

File tree

2 files changed

+104
-34
lines changed

2 files changed

+104
-34
lines changed

src/gr_mat/gr_poly_solve_lode_newton.c

Lines changed: 103 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,66 @@
1313
#include "gr_mat.h"
1414
#include "gr_poly.h"
1515

16+
static int
17+
gr_mat_gr_poly_shift_left(gr_mat_t res, gr_mat_t A, slong s, gr_ctx_t poly_ctx)
18+
{
19+
int status = GR_SUCCESS;
20+
gr_ctx_struct *coeff_ctx = POLYNOMIAL_CTX(poly_ctx)->base_ring;
21+
22+
for (slong i = 0; i < A->r; i++)
23+
{
24+
for (slong j = 0; j < A->c; j++)
25+
{
26+
gr_srcptr entry_A = gr_mat_entry_srcptr(A, i, j, poly_ctx);
27+
gr_ptr entry_res = gr_mat_entry_ptr(res, i, j, poly_ctx);
28+
29+
status |= gr_poly_shift_left(entry_res, entry_A, s, coeff_ctx);
30+
}
31+
}
32+
33+
return status;
34+
}
35+
36+
static int
37+
gr_mat_gr_poly_shift_right(gr_mat_t res, gr_mat_t A, slong s, gr_ctx_t poly_ctx)
38+
{
39+
int status = GR_SUCCESS;
40+
gr_ctx_struct *coeff_ctx = POLYNOMIAL_CTX(poly_ctx)->base_ring;
41+
42+
for (slong i = 0; i < A->r; i++)
43+
{
44+
for (slong j = 0; j < A->c; j++)
45+
{
46+
gr_srcptr entry_A = gr_mat_entry_srcptr(A, i, j, poly_ctx);
47+
gr_ptr entry_res = gr_mat_entry_ptr(res, i, j, poly_ctx);
48+
49+
status |= gr_poly_shift_right(entry_res, entry_A, s, coeff_ctx);
50+
}
51+
}
52+
53+
return status;
54+
}
55+
56+
static int
57+
gr_mat_gr_poly_truncate(gr_mat_t res, gr_mat_t A, slong n, gr_ctx_t poly_ctx)
58+
{
59+
int status = GR_SUCCESS;
60+
gr_ctx_struct *coeff_ctx = POLYNOMIAL_CTX(poly_ctx)->base_ring;
61+
62+
for (slong i = 0; i < A->r; i++)
63+
{
64+
for (slong j = 0; j < A->c; j++)
65+
{
66+
gr_srcptr entry_A = gr_mat_entry_srcptr(A, i, j, poly_ctx);
67+
gr_ptr entry_res = gr_mat_entry_ptr(res, i, j, poly_ctx);
68+
69+
status |= gr_poly_truncate(entry_res, entry_A, n, coeff_ctx);
70+
}
71+
}
72+
73+
return status;
74+
}
75+
1676
/*
1777
* This is a FLINT implementation of the algorithm described in:
1878
* Fast computation of power series solutions of systems of differential equations
@@ -36,7 +96,7 @@
3696
* [ a b c d ] [ b30 b31 b32 b33 ] [ ... ... ... ... ]
3797
*/
3898
static int
39-
gr_mat_gr_poly_mullow_companion(gr_mat_t res, gr_mat_t A, gr_mat_t B, slong len, gr_ctx_t poly_ctx)
99+
gr_mat_gr_poly_mulmid_companion(gr_mat_t res, gr_mat_t A, gr_mat_t B, slong nlo, slong nhi, gr_ctx_t poly_ctx)
40100
{
41101
gr_ctx_struct *coeff_ctx = POLYNOMIAL_CTX(poly_ctx)->base_ring;
42102
gr_mat_t tmp_mat;
@@ -46,25 +106,28 @@ gr_mat_gr_poly_mullow_companion(gr_mat_t res, gr_mat_t A, gr_mat_t B, slong len,
46106
gr_mat_init(tmp_mat, A->r, A->c, poly_ctx);
47107
gr_poly_init(tmp_poly, coeff_ctx);
48108

49-
status |= gr_mat_zero(tmp_mat, poly_ctx);
50-
for (int i = 0; i < A->r - 1; i++)
109+
for (slong i = 0; i < A->r - 1; i++)
51110
{
52-
for (int j = 0; j < A->c; j++)
111+
for (slong j = 0; j < A->c; j++)
53112
{
54113
gr_srcptr entry_B = gr_mat_entry_srcptr(B, i + 1, j, poly_ctx);
55114
gr_ptr entry_tmp = gr_mat_entry_ptr(tmp_mat, i, j, poly_ctx);
56-
status |= gr_poly_set(entry_tmp, entry_B, coeff_ctx);
115+
116+
status |= gr_poly_shift_right(entry_tmp, entry_B, nlo, coeff_ctx);
117+
status |= gr_poly_truncate(entry_tmp, entry_tmp, nhi - nlo, coeff_ctx);
57118
}
58119
}
59120

60-
for (int j = 0; j < A->c; j++)
121+
for (slong j = 0; j < A->c; j++)
61122
{
62123
gr_ptr entry_tmp = gr_mat_entry_ptr(tmp_mat, A->r - 1, j, poly_ctx);
63-
for (int i = 0; i < B->r; i++)
124+
125+
for (slong i = 0; i < B->r; i++)
64126
{
65127
gr_srcptr entry_A = gr_mat_entry_srcptr(A, A->r - 1, i, poly_ctx);
66128
gr_srcptr entry_B = gr_mat_entry_srcptr(B, i, j, poly_ctx);
67-
status |= gr_poly_mullow(tmp_poly, entry_A, entry_B, len, coeff_ctx);
129+
130+
status |= gr_poly_mulmid(tmp_poly, entry_A, entry_B, nlo, nhi, coeff_ctx);
68131
status |= gr_poly_add(entry_tmp, entry_tmp, tmp_poly, coeff_ctx);
69132
}
70133
}
@@ -134,8 +197,7 @@ _gr_mat_gr_poly_solve_lode_newton_start(gr_mat_t Y, gr_mat_t Z, gr_poly_t A_deno
134197
/* One initial iteration for A_denominator_inv mod z^2. */
135198
m = 2;
136199
status |= gr_poly_truncate(tmp_poly, A_denominator, m, sol_coeff_ctx);
137-
status |= gr_poly_mullow(tmp_poly, tmp_poly, A_denominator_inv, m, sol_coeff_ctx);
138-
status |= gr_poly_shift_right(tmp_poly, tmp_poly, m / 2, sol_coeff_ctx);
200+
status |= gr_poly_mulmid(tmp_poly, tmp_poly, A_denominator_inv, m / 2, m, sol_coeff_ctx);
139201
status |= gr_poly_mullow(tmp_poly, tmp_poly, A_denominator_inv, m / 2, sol_coeff_ctx);
140202
status |= gr_poly_shift_left(tmp_poly, tmp_poly, m / 2, sol_coeff_ctx);
141203
status |= gr_poly_sub(A_denominator_inv, A_denominator_inv, tmp_poly, sol_coeff_ctx);
@@ -164,29 +226,23 @@ _gr_mat_gr_poly_solve_lode_newton_step(gr_mat_t Y, gr_mat_t Z, gr_poly_t A_denom
164226
/* Z = Z + (Z*(I - Y*Z) mod z^m) */
165227
status |= gr_mat_mul(tmp_mat, Y, Z, sol_poly_ctx);
166228
status |= gr_mat_sub_ui(tmp_mat, tmp_mat, 1, sol_poly_ctx);
229+
/* Todo: should be mullow */
167230
status |= gr_mat_mul(tmp_mat, Z, tmp_mat, sol_poly_ctx);
168-
for (i = 0; i < tmp_mat->r; i++)
169-
{
170-
for (j = 0; j < tmp_mat->c; j++)
171-
{
172-
gr_ptr entry_tmp = gr_mat_entry_ptr(tmp_mat, i, j, sol_poly_ctx);
173-
status |= gr_poly_truncate(entry_tmp, entry_tmp, m, sol_coeff_ctx);
174-
}
175-
}
231+
status |= gr_mat_gr_poly_truncate(tmp_mat, tmp_mat, m, sol_poly_ctx);
176232
status |= gr_mat_sub(Z, Z, tmp_mat, sol_poly_ctx);
177233

178234
/* Iteration for A_denominator_inv mod z^len. */
179-
status |= gr_poly_truncate(tmp_poly, A_denominator, len, sol_coeff_ctx);
180-
/* TODO: Use middle product here? */
181-
status |= gr_poly_mullow(tmp_poly, tmp_poly, A_denominator_inv, len, sol_coeff_ctx);
182-
status |= gr_poly_shift_right(tmp_poly, tmp_poly, m, sol_coeff_ctx);
235+
status |= gr_poly_mulmid(tmp_poly, A_denominator, A_denominator_inv, m, len, sol_coeff_ctx);
183236
status |= gr_poly_mullow(tmp_poly, tmp_poly, A_denominator_inv, m, sol_coeff_ctx);
184237
status |= gr_poly_shift_left(tmp_poly, tmp_poly, m, sol_coeff_ctx);
238+
185239
status |= gr_poly_sub(A_denominator_inv, A_denominator_inv, tmp_poly, sol_coeff_ctx);
186240

187241
/* Next we are going to set Err = Y' - (A mod z^{len - 1})*Y mod z^{len - 1} in a few steps. */
242+
/* The m-1 low coefficients of Err are zero by construction, so we actually
243+
compute Err divided by x^(m-1). */
188244

189-
/* tmp_mat = (A mod z^{len - 1})*Y */
245+
/* tmp_mat = (A mod z^{len - 1})*Y (divided by x^(m-1)) */
190246
for (i = 0; i < Y->r; i++)
191247
{
192248
for (j = 0; j < Y->c; j++)
@@ -197,24 +253,41 @@ _gr_mat_gr_poly_solve_lode_newton_step(gr_mat_t Y, gr_mat_t Z, gr_poly_t A_denom
197253
}
198254
}
199255
if (A_is_companion)
200-
status |= gr_mat_gr_poly_mullow_companion(tmp_mat, tmp_mat, Y, len - 1, sol_poly_ctx);
256+
{
257+
status |= gr_mat_gr_poly_mulmid_companion(tmp_mat, tmp_mat, Y, m - 1, len - 1, sol_poly_ctx);
258+
}
201259
else
260+
{
261+
/* Todo: should be mulmid */
202262
status |= gr_mat_mul(tmp_mat, tmp_mat, Y, sol_poly_ctx);
263+
status |= gr_mat_gr_poly_truncate(tmp_mat, tmp_mat, len - 1, sol_poly_ctx);
264+
status |= gr_mat_gr_poly_shift_right(tmp_mat, tmp_mat, m - 1, sol_poly_ctx);
265+
}
203266

204-
/* Err = Y' - tmp_mat */
267+
/* Err = Y' - tmp_mat (divided by x^(m-1)) */
205268
for (i = 0; i < Y->r; i++)
206269
{
207270
for (j = 0; j < Y->c; j++)
208271
{
209272
gr_srcptr entry_Y = gr_mat_entry_srcptr(Y, i, j, sol_poly_ctx);
210273
gr_ptr entry_Err = gr_mat_entry_ptr(Err, i, j, sol_poly_ctx);
274+
/* Todo: compute only the high part of the derivative */
211275
status |= gr_poly_derivative(entry_Err, entry_Y, sol_coeff_ctx);
276+
status |= gr_poly_shift_right(entry_Err, entry_Err, m - 1, sol_coeff_ctx);
212277
}
278+
213279
}
214280
status |= gr_mat_sub(Err, Err, tmp_mat, sol_poly_ctx);
215281

216282
/* Y = Y - (Y*integral(Z*Err) mod z^len) */
283+
/* Todo: should be mullow */
217284
status |= gr_mat_mul(tmp_mat, Z, Err, sol_poly_ctx);
285+
status |= gr_mat_gr_poly_truncate(tmp_mat, tmp_mat, len - m, sol_poly_ctx);
286+
287+
/* Err was implicitly divided x^(m-1), so restore it to compute the integral */
288+
/* Todo: avoid the following explicit shifts; compute just the high
289+
part of the integral */
290+
status |= gr_mat_gr_poly_shift_left(tmp_mat, tmp_mat, m - 1, sol_poly_ctx);
218291
for (i = 0; i < Y->r; i++)
219292
{
220293
for (j = 0; j < Y->c; j++)
@@ -223,15 +296,12 @@ _gr_mat_gr_poly_solve_lode_newton_step(gr_mat_t Y, gr_mat_t Z, gr_poly_t A_denom
223296
status |= gr_poly_integral(entry_tmp, entry_tmp, sol_coeff_ctx);
224297
}
225298
}
299+
status |= gr_mat_gr_poly_shift_right(tmp_mat, tmp_mat, m, sol_poly_ctx);
300+
301+
/* Todo: should be mullow */
226302
status |= gr_mat_mul(tmp_mat, Y, tmp_mat, sol_poly_ctx);
227-
for (i = 0; i < tmp_mat->r; i++)
228-
{
229-
for (j = 0; j < tmp_mat->c; j++)
230-
{
231-
gr_ptr entry_tmp = gr_mat_entry_ptr(tmp_mat, i, j, sol_poly_ctx);
232-
status |= gr_poly_truncate(entry_tmp, entry_tmp, len, sol_coeff_ctx);
233-
}
234-
}
303+
status |= gr_mat_gr_poly_truncate(tmp_mat, tmp_mat, len - m, sol_poly_ctx);
304+
status |= gr_mat_gr_poly_shift_left(tmp_mat, tmp_mat, m, sol_poly_ctx);
235305
status |= gr_mat_sub(Y, Y, tmp_mat, sol_poly_ctx);
236306

237307
gr_poly_clear(tmp_poly, sol_coeff_ctx);

src/gr_mat/test/t-gr_poly_solve_lode_newton.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ TEST_GR_FUNCTION_START(gr_mat_gr_poly_solve_lode_newton, state, count_success, c
1919
{
2020
slong iter;
2121

22-
for (iter = 0; iter < 1000; iter++)
22+
for (iter = 0; iter < 100 * flint_test_multiplier(); iter++)
2323
{
2424
gr_ctx_t ctx, poly_ctx;
2525
gr_mat_t A_numerator, Y, Y0, AY, err, tmp_mat;

0 commit comments

Comments
 (0)