@@ -50,32 +50,20 @@ namespace cp_algo::math::fft {
5050 auto [Dvx, Dvy] = D.vget (k);
5151 auto [Crvx, Crvy] = vpoint (Cvx, Cvy) * vpoint (real (rt), imag (rt));
5252 auto [Drvx, Drvy] = vpoint (Dvx, Dvy) * vpoint (real (rt), imag (rt));
53- alignas (32 ) ftype Cx[2 * flen];
54- alignas (32 ) ftype Cy[2 * flen];
55- alignas (32 ) ftype Dx[2 * flen];
56- alignas (32 ) ftype Dy[2 * flen];
57- Cvx.copy_to (Cx + flen, std::experimental::vector_aligned);
58- Cvy.copy_to (Cy + flen, std::experimental::vector_aligned);
59- Dvx.copy_to (Dx + flen, std::experimental::vector_aligned);
60- Dvy.copy_to (Dy + flen, std::experimental::vector_aligned);
61- Crvx.copy_to (Cx, std::experimental::vector_aligned);
62- Crvy.copy_to (Cy, std::experimental::vector_aligned);
63- Drvx.copy_to (Dx, std::experimental::vector_aligned);
64- Drvy.copy_to (Dy, std::experimental::vector_aligned);
53+ vftype Cx[2 ] = {Crvx, Cvx}, Cy[2 ] = {Crvy, Cvy};
54+ vftype Dx[2 ] = {Drvx, Dvx}, Dy[2 ] = {Drvy, Dvy};
6555 vpoint AC, AD, BC, BD;
6656 AC = AD = BC = BD = {0 , 0 };
6757 for (size_t i = 0 ; i < flen; i++) {
6858 vftype Csx, Csy, Dsx, Dsy;
69- Csx.copy_from (Cx + flen - i, std::experimental ::element_aligned);
70- Csy.copy_from (Cy + flen - i, std::experimental ::element_aligned);
71- Dsx.copy_from (Dx + flen - i, std::experimental ::element_aligned);
72- Dsy.copy_from (Dy + flen - i, std::experimental ::element_aligned);
59+ Csx.copy_from ((ftype*) Cx + flen - i, stdx ::element_aligned);
60+ Csy.copy_from ((ftype*) Cy + flen - i, stdx ::element_aligned);
61+ Dsx.copy_from ((ftype*) Dx + flen - i, stdx ::element_aligned);
62+ Dsy.copy_from ((ftype*) Dy + flen - i, stdx ::element_aligned);
7363 vpoint As = {Ax[i], Ay[i]}, Bs = {Bx[i], By[i]};
7464 vpoint Cs = {Csx, Csy}, Ds = {Dsx, Dsy};
75- AC += As * Cs;
76- AD += As * Ds;
77- BC += Bs * Cs;
78- BD += Bs * Ds;
65+ AC += As * Cs; AD += As * Ds;
66+ BC += Bs * Cs; BD += Bs * Ds;
7967 }
8068 A.set (k, AC);
8169 C.set (k, AD + BC);
0 commit comments