66#include < stack>
77#include < cmath>
88
9- void P2P (double x, double y, double z, double mux, double muy, double muz, double *F) {
10- double R2 = x*x + y*y + z*z;
11- double R3 = R2*sqrt (R2);
12- double R5 = R3*R2;
13- double mu_dot_r = mux*x + muy*y + muz*z;
14- #pragma omp atomic
15- F[0 ] += (3 *mu_dot_r * x / R5 - mux / R3);
16- #pragma omp atomic
17- F[1 ] += (3 *mu_dot_r * y / R5 - muy / R3);
18- #pragma omp atomic
19- F[2 ] += (3 *mu_dot_r * z / R5 - muz / R3);
20- }
21-
22- void P2P_noatomic (double x, double y, double z, double mux, double muy, double muz, double *F) {
23- double R2 = x*x + y*y + z*z;
24- double R3 = R2*sqrt (R2);
25- double R5 = R3*R2;
26- double mu_dot_r = mux*x + muy*y + muz*z;
27- F[0 ] += (3 *mu_dot_r * x / R5 - mux / R3);
28- F[1 ] += (3 *mu_dot_r * y / R5 - muy / R3);
29- F[2 ] += (3 *mu_dot_r * z / R5 - muz / R3);
9+ void M_sanity_check (const std::vector<Cell> &cells) {
10+ double M0 = 0 ;
11+ for (size_t c = 1 ; c < cells.size (); c++) {
12+ if (cells[c].nchild == 0 ) {
13+ M0 += cells[c].M [0 ];
14+ }
15+ }
16+ std::cout << " Cell 0 has M0 = " << cells[0 ].M [0 ] << std::endl;
17+ std::cout << " Other cells = " << M0 << std::endl;
18+ if (std::abs ((cells[0 ].M [0 ] - M0)/M0) > 10e-10 ) {
19+ throw std::runtime_error (" M0 sanity check failed" );
20+ }
3021}
3122
3223
33- void P2P_Cells (size_t A, size_t B, std::vector<Cell> &cells,
34- std::vector<Particle> &particles, double *F) {
24+ void P2P_Cells (size_t A, size_t B, std::vector<Cell> &cells, std::vector<Particle> &particles, double *F) {
3525 // A - target
3626 // B - source
3727 for (size_t p1 = 0 ; p1 < cells[A].nleaf ; p1++) {
38- double F_p[3 ] = {0.0 };
28+ // double F_p[FMMGEN_OUTPUTSIZE ] = {0.0};
3929 size_t l1 = cells[A].leaf [p1];
4030 for (size_t p2 = 0 ; p2 < cells[B].nleaf ; p2++) {
4131 size_t l2 = cells[B].leaf [p2];
4232 if (l2 != l1) {
43- double dx = particles[l1].r [0 ] - particles[l2].r [0 ];
44- double dy = particles[l1].r [1 ] - particles[l2].r [1 ];
45- double dz = particles[l1].r [2 ] - particles[l2].r [2 ];
46- P2P (dx, dy, dz, particles[l2].mu [ 0 ], particles[l2]. mu [ 1 ], particles[l2]. mu [ 2 ], F_p );
33+ double dx = ( particles[l1].r [0 ] - particles[l2].r [0 ]) ;
34+ double dy = ( particles[l1].r [1 ] - particles[l2].r [1 ]) ;
35+ double dz = ( particles[l1].r [2 ] - particles[l2].r [2 ]) ;
36+ P2P (dx, dy, dz, particles[l2].S , &F[FMMGEN_OUTPUTSIZE*l1] );
4737 }
4838 }
49- #pragma omp atomic
50- F[3 * l1 + 0 ] += F_p[0 ];
51- #pragma omp atomic
52- F[3 * l1 + 1 ] += F_p[1 ];
53- #pragma omp atomic
54- F[3 * l1 + 2 ] += F_p[2 ];
5539 }
5640}
5741
5842void interact_dehnen (size_t A, size_t B, std::vector<Cell> &cells, std::vector<Particle> &particles,
5943 double theta, size_t order, size_t ncrit, double *F) {
60- double dx = cells[A].x - cells[B].x ;
61- double dy = cells[A].y - cells[B].y ;
62- double dz = cells[A].z - cells[B].z ;
44+ double dx = ( cells[A].x - cells[B].x ) ;
45+ double dy = ( cells[A].y - cells[B].y ) ;
46+ double dz = ( cells[A].z - cells[B].z ) ;
6347 double R = sqrt (dx*dx + dy*dy + dz*dz);
6448
6549 if (R*theta > (cells[A].rmax + cells[B].rmax )) {
@@ -97,7 +81,7 @@ void interact_dehnen(size_t A, size_t B, std::vector<Cell> &cells, std::vector<P
9781void interact_dehnen_lazy (const size_t A, const size_t B,
9882 const std::vector<Cell> &cells,
9983 const std::vector<Particle> &particles,
100- const double theta, const size_t order,
84+ const double theta, const size_t order,
10185 const size_t ncrit,
10286 std::vector<std::pair<size_t , size_t >> &M2L_list,
10387 std::vector<std::pair<size_t , size_t >> &P2P_list) {
@@ -150,32 +134,30 @@ void interact_dehnen_lazy(const size_t A, const size_t B,
150134
151135void evaluate_P2M (std::vector<Particle> &particles, std::vector<Cell> &cells,
152136 size_t cell, size_t ncrit, size_t exporder) {
153- // if (cells[cell].nleaf >= ncrit) {
154- // for (size_t octant = 0; octant < 8; octant++) {
155- // if (cells[cell].nchild & (1 << octant)) {
156- // evaluate_P2M(particles, cells, cells[cell].child[octant], ncrit, exporder);
157- // }
158- // }
159- // }
160- // else {
161- double *M = new double [Nterms (exporder+1 )]();
162137 #pragma omp for
163138 for (size_t c = 0 ; c < cells.size (); c++) {
139+ // std::cout << "Cell " << c << std::endl;
140+ // std::cout << " Msize = " << Msize(exporder, FMMGEN_SOURCEORDER) << std::endl;
141+ size_t msize = Msize (exporder, FMMGEN_SOURCEORDER);
142+ double *M = new double [msize]();
143+
164144 if (cells[c].nleaf < ncrit) {
165- for (size_t i = 0 ; i < cells[c].nleaf ; i++) {
166- size_t l = cells[c].leaf [i];
167- M[0 ] = particles[l].mu [0 ];
168- M[1 ] = particles[l].mu [1 ];
169- M[2 ] = particles[l].mu [2 ];
170- // std::cout << "mu[" << l << "] = " << particles[l].mu[0] << std::endl;
171- double dx = (particles[l].r [0 ] - cells[c].x );
172- double dy = (particles[l].r [1 ] - cells[c].y );
173- double dz = (particles[l].r [2 ] - cells[c].z );
174- M2M (-dx, -dy, -dz, M, cells[c].M , exporder);
175- }
145+ for (size_t i = 0 ; i < cells[c].nleaf ; i++) {
146+ size_t l = cells[c].leaf [i];
147+ // Walter dehnen's definition:
148+ // (-1)^m / m! (x_a - z_a)^m
149+ double dx = (cells[c].x - particles[l].r [0 ]);
150+ double dy = (cells[c].y - particles[l].r [1 ]);
151+ double dz = (cells[c].z - particles[l].r [2 ]);
152+ for (int k = 0 ; k < FMMGEN_SOURCESIZE; k++) {
153+ // std::cout << particles[l].S[k] << std::endl;
154+ M[k] = particles[l].S [k];
155+ }
156+ M2M (dx, dy, dz, M, cells[c].M , exporder);
157+ }
176158 }
159+ delete[] M;
177160 }
178- delete[] M;
179161}
180162
181163void evaluate_M2M (std::vector<Particle> &particles, std::vector<Cell> &cells,
@@ -189,12 +171,17 @@ void evaluate_M2M(std::vector<Particle> &particles, std::vector<Cell> &cells,
189171 by iterating backwards through the nodes because
190172 of the way the tree is constructed.
191173 */
192- #pragma omp for
193- for (size_t c = 1 ; c < cells.size (); c++) {
174+ // #pragma omp for schedule(dynamic)
175+ // Can't currently go up the tree in parallel.
176+ // Needs to be recursive or summing not correct.
177+
178+ // Dehnen definition:
179+ // M_m(z_p) = (z_p - z_c)^n / n! M_{m - n}
180+ for (size_t c = cells.size () - 1 ; c > 0 ; c--) {
194181 size_t p = cells[c].parent ;
195- double dx = cells[p].x - cells[c].x ;
196- double dy = cells[p].y - cells[c].y ;
197- double dz = cells[p].z - cells[c].z ;
182+ double dx = ( cells[p].x - cells[c].x ) ;
183+ double dy = ( cells[p].y - cells[c].y ) ;
184+ double dz = ( cells[p].z - cells[c].z ) ;
198185 M2M (dx, dy, dz, cells[c].M , cells[p].M , exporder);
199186 }
200187}
@@ -206,6 +193,11 @@ void evaluate_M2L_lazy(std::vector<Cell> &cells,
206193 for (size_t i = 0 ; i < M2L_list.size (); i++) {
207194 size_t B = M2L_list[i].first ;
208195 size_t A = M2L_list[i].second ;
196+ // Dehnen definition:
197+ // F_n(z_B) = M_m(z_A) * D_{n+m} (z_B - z_A)
198+
199+ // So here, we've reversed the order:
200+ // F_n(z_A) = M_m(z_B) * D_{n+m} (z_A - z_B)
209201 double dx = cells[A].x - cells[B].x ;
210202 double dy = cells[A].y - cells[B].y ;
211203 double dz = cells[A].z - cells[B].z ;
@@ -225,16 +217,17 @@ void evaluate_P2P_lazy(std::vector<Cell> &cells, std::vector<Particle> &particle
225217
226218
227219void evaluate_L2L (std::vector<Cell> &cells, size_t exporder) {
228- #pragma omp for // schedule(runtime)
229- for (size_t i = 0 ; i < cells.size (); i++) {
220+ // Can't currently go down the tree in parallel!
221+ // needs to be recursive or summing not correct.
222+ for (size_t p = 0 ; p < cells.size (); p++) {
230223 for (int octant = 0 ; octant < 8 ; octant++) {
231- if (cells[i ].nchild & (1 << octant)) {
232- // for child in cell i
233- size_t c = cells[i ].child [octant];
234- double dx = cells[c].x - cells[i ].x ;
235- double dy = cells[c].y - cells[i ].y ;
236- double dz = cells[c].z - cells[i ].z ;
237- L2L (dx, dy, dz, cells[i ].L , cells[c].L , exporder);
224+ if (cells[p ].nchild & (1 << octant)) {
225+ // for child c in cell p
226+ size_t c = cells[p ].child [octant];
227+ double dx = cells[c].x - cells[p ].x ;
228+ double dy = cells[c].y - cells[p ].y ;
229+ double dz = cells[c].z - cells[p ].z ;
230+ L2L (dx, dy, dz, cells[p ].L , cells[c].L , exporder);
238231 }
239232 }
240233 }
@@ -245,36 +238,77 @@ void evaluate_L2P(std::vector<Particle> &particles, std::vector<Cell> &cells,
245238 #pragma omp for schedule(runtime)
246239 for (size_t i = 0 ; i < cells.size (); i++) {
247240 if (cells[i].nleaf < ncrit) {
248- // std::cout << "cell " << i << " is a leaf " << std::endl;
249241 for (size_t p = 0 ; p < cells[i].nleaf ; p++) {
250242 size_t k = cells[i].leaf [p];
251- // std::cout << "L2P from " << i << " to particle " << k << std::endl;
252243 double dx = particles[k].r [0 ] - cells[i].x ;
253244 double dy = particles[k].r [1 ] - cells[i].y ;
254245 double dz = particles[k].r [2 ] - cells[i].z ;
255- double Fv[3 ] = {0.0 };
256- L2P (dx, dy, dz, cells[i].L , Fv, exporder);
257- F[3 *k+0 ] -= Fv[0 ];
258- F[3 *k+1 ] -= Fv[1 ];
259- F[3 *k+2 ] -= Fv[2 ];
246+ L2P (dx, dy, dz, cells[i].L , &F[FMMGEN_OUTPUTSIZE*k], exporder);
260247 }
261248 }
262249 }
263250}
264251
265252void evaluate_direct (std::vector<Particle> &particles, double *F, size_t n) {
266- #pragma omp parallel for schedule(runtime)
253+ #pragma omp parallel for schedule(runtime)
267254 for (size_t i = 0 ; i < n; i++) {
268255 for (size_t j = 0 ; j < n; j++) {
269- if (i != j) {
270- double dx = particles[i].r [0 ] - particles[j].r [0 ];
271- double dy = particles[i].r [1 ] - particles[j].r [1 ];
272- double dz = particles[i].r [2 ] - particles[j].r [2 ];
273- double mux = particles[j].mu [0 ];
274- double muy = particles[j].mu [1 ];
275- double muz = particles[j].mu [2 ];
276- P2P (dx, dy, dz, mux, muy, muz, &F[3 *i]);
256+ if (i != j) {
257+ double dx = particles[i].r [0 ] - particles[j].r [0 ];
258+ double dy = particles[i].r [1 ] - particles[j].r [1 ];
259+ double dz = particles[i].r [2 ] - particles[j].r [2 ];
260+ P2P (dx, dy, dz, particles[j].S , &F[FMMGEN_OUTPUTSIZE*i]);
261+ }
262+ }
263+ }
264+ }
265+
266+ void evaluate_M2P_and_P2P (std::vector<Particle> &particles, unsigned int p, unsigned int i,
267+ std::vector<Cell> &cells, double *F, unsigned int n_crit, double theta,
268+ unsigned int exporder) {
269+ // For particle i, start at cell p
270+ double dx, dy, dz, r;
271+ int c;
272+ unsigned int j;
273+ // If cell p is not a leaf cell:
274+ if (cells[p].nleaf >= n_crit) {
275+ // Iterate through it's children
276+ for (unsigned int octant = 0 ; octant < 8 ; octant++) {
277+ // If a child exists in a given octant:
278+ if (cells[p].nchild & (1 << octant)) {
279+ // Get the child's index
280+ c = cells[p].child [octant];
281+ // Calculate the distance from the particle to the child cell
282+ dx = particles[i].r [0 ] - cells[c].x ;
283+ dy = particles[i].r [1 ] - cells[c].y ;
284+ dz = particles[i].r [2 ] - cells[c].z ;
285+ r = sqrt (dx*dx + dy*dy + dz*dz);
286+ // Apply the Barnes-Hut criterion:
287+ if (cells[c].r > theta * r) {
288+ // If the cell is 'near':
289+ evaluate_M2P_and_P2P (particles, c, i, cells, F, n_crit, theta, exporder);
290+ }
291+ else {
292+ // If the cell is 'far', calculate the potential and field
293+ // on the particle from the multipole expansion:
294+ M2P (dx, dy, dz, cells[c].M , &F[FMMGEN_OUTPUTSIZE*i], exporder);
277295 }
278296 }
297+ }
298+ }
299+ else {
300+ // loop in leaf cell's particles
301+ for (unsigned int l = 0 ; l < (cells[p].nleaf ); l++) {
302+ // Get the particle index:
303+ j = cells[p].leaf [l];
304+ if (i != j) {
305+ // Calculate the interparticle distance:
306+ dx = particles[i].r [0 ] - particles[j].r [0 ];
307+ dy = particles[i].r [1 ] - particles[j].r [1 ];
308+ dz = particles[i].r [2 ] - particles[j].r [2 ];
309+ // Compute the field:
310+ P2P (dx, dy, dz, particles[j].S , &F[FMMGEN_OUTPUTSIZE*i]);
311+ }
312+ }
279313 }
280314}
0 commit comments