Skip to content

Commit 64c4f36

Browse files
committed
Commit final thesis version of FMM/BH in Fidimag
1 parent 5fe1338 commit 64c4f36

23 files changed

+171693
-165209
lines changed

fidimag/atomistic/demag.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,13 @@ def compute_energy(self):
103103

104104

105105
class DemagFMM(Energy):
106-
def __init__(self, order, ncrit, theta, name="DemagFMM"):
106+
def __init__(self, order, ncrit, theta, name="DemagFMM", type='fmm'):
107+
self.type = type
108+
if self.type == 'fmm':
109+
self._type = 0
110+
elif self.type == 'bh':
111+
self._type = 1
112+
107113
self.name = name
108114
assert order > 0, "Order must be 1 or higher"
109115
assert order < 11, "Order bust be < 11"
@@ -121,11 +127,12 @@ def setup(self, mesh, spin, mu_s, mu_s_inv):
121127
self.order,
122128
self.coords,
123129
self.m_temp,
124-
self.mu_s,
130+
self.mu_s, self._type
125131
)
126132

127133
def compute_field(self, t=0, spin=None):
134+
print('computing field')
128135
self.m_temp[:] = spin if spin is not None else self.spin
129136
self.fmm.compute_field(self.field)
130-
self.field *= 1e-7
137+
self.field *= -1e-7
131138
return self.field

fidimag/atomistic/fmmlib/calculate.cpp

Lines changed: 123 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -6,60 +6,44 @@
66
#include <stack>
77
#include <cmath>
88

9-
void P2P(double x, double y, double z, double mux, double muy, double muz, double *F) {
10-
double R2 = x*x + y*y + z*z;
11-
double R3 = R2*sqrt(R2);
12-
double R5 = R3*R2;
13-
double mu_dot_r = mux*x + muy*y + muz*z;
14-
#pragma omp atomic
15-
F[0] += (3*mu_dot_r * x / R5 - mux / R3);
16-
#pragma omp atomic
17-
F[1] += (3*mu_dot_r * y / R5 - muy / R3);
18-
#pragma omp atomic
19-
F[2] += (3*mu_dot_r * z / R5 - muz / R3);
20-
}
21-
22-
void P2P_noatomic(double x, double y, double z, double mux, double muy, double muz, double *F) {
23-
double R2 = x*x + y*y + z*z;
24-
double R3 = R2*sqrt(R2);
25-
double R5 = R3*R2;
26-
double mu_dot_r = mux*x + muy*y + muz*z;
27-
F[0] += (3*mu_dot_r * x / R5 - mux / R3);
28-
F[1] += (3*mu_dot_r * y / R5 - muy / R3);
29-
F[2] += (3*mu_dot_r * z / R5 - muz / R3);
9+
void M_sanity_check(const std::vector<Cell> &cells) {
10+
double M0 = 0;
11+
for(size_t c = 1; c < cells.size(); c++) {
12+
if (cells[c].nchild == 0) {
13+
M0 += cells[c].M[0];
14+
}
15+
}
16+
std::cout << "Cell 0 has M0 = " << cells[0].M[0] << std::endl;
17+
std::cout << "Other cells = " << M0 << std::endl;
18+
if (std::abs((cells[0].M[0] - M0)/M0) > 10e-10) {
19+
throw std::runtime_error("M0 sanity check failed");
20+
}
3021
}
3122

3223

33-
void P2P_Cells(size_t A, size_t B, std::vector<Cell> &cells,
34-
std::vector<Particle> &particles, double *F) {
24+
void P2P_Cells(size_t A, size_t B, std::vector<Cell> &cells, std::vector<Particle> &particles, double *F) {
3525
// A - target
3626
// B - source
3727
for (size_t p1 = 0; p1 < cells[A].nleaf; p1++) {
38-
double F_p[3] = {0.0};
28+
//double F_p[FMMGEN_OUTPUTSIZE] = {0.0};
3929
size_t l1 = cells[A].leaf[p1];
4030
for (size_t p2 = 0; p2 < cells[B].nleaf; p2++) {
4131
size_t l2 = cells[B].leaf[p2];
4232
if (l2 != l1) {
43-
double dx = particles[l1].r[0] - particles[l2].r[0];
44-
double dy = particles[l1].r[1] - particles[l2].r[1];
45-
double dz = particles[l1].r[2] - particles[l2].r[2];
46-
P2P(dx, dy, dz, particles[l2].mu[0], particles[l2].mu[1], particles[l2].mu[2], F_p);
33+
double dx = (particles[l1].r[0] - particles[l2].r[0]);
34+
double dy = (particles[l1].r[1] - particles[l2].r[1]);
35+
double dz = (particles[l1].r[2] - particles[l2].r[2]);
36+
P2P(dx, dy, dz, particles[l2].S, &F[FMMGEN_OUTPUTSIZE*l1]);
4737
}
4838
}
49-
#pragma omp atomic
50-
F[3 * l1 + 0] += F_p[0];
51-
#pragma omp atomic
52-
F[3 * l1 + 1] += F_p[1];
53-
#pragma omp atomic
54-
F[3 * l1 + 2] += F_p[2];
5539
}
5640
}
5741

5842
void interact_dehnen(size_t A, size_t B, std::vector<Cell> &cells, std::vector<Particle> &particles,
5943
double theta, size_t order, size_t ncrit, double *F) {
60-
double dx = cells[A].x - cells[B].x;
61-
double dy = cells[A].y - cells[B].y;
62-
double dz = cells[A].z - cells[B].z;
44+
double dx = (cells[A].x - cells[B].x);
45+
double dy = (cells[A].y - cells[B].y);
46+
double dz = (cells[A].z - cells[B].z);
6347
double R = sqrt(dx*dx + dy*dy + dz*dz);
6448

6549
if (R*theta > (cells[A].rmax + cells[B].rmax)) {
@@ -97,7 +81,7 @@ void interact_dehnen(size_t A, size_t B, std::vector<Cell> &cells, std::vector<P
9781
void interact_dehnen_lazy(const size_t A, const size_t B,
9882
const std::vector<Cell> &cells,
9983
const std::vector<Particle> &particles,
100-
const double theta, const size_t order,
84+
const double theta, const size_t order,
10185
const size_t ncrit,
10286
std::vector<std::pair<size_t, size_t>> &M2L_list,
10387
std::vector<std::pair<size_t, size_t>> &P2P_list) {
@@ -150,32 +134,30 @@ void interact_dehnen_lazy(const size_t A, const size_t B,
150134

151135
void evaluate_P2M(std::vector<Particle> &particles, std::vector<Cell> &cells,
152136
size_t cell, size_t ncrit, size_t exporder) {
153-
// if (cells[cell].nleaf >= ncrit) {
154-
// for (size_t octant = 0; octant < 8; octant++) {
155-
// if (cells[cell].nchild & (1 << octant)) {
156-
// evaluate_P2M(particles, cells, cells[cell].child[octant], ncrit, exporder);
157-
// }
158-
// }
159-
// }
160-
// else {
161-
double *M = new double[Nterms(exporder+1)]();
162137
#pragma omp for
163138
for(size_t c = 0; c < cells.size(); c++) {
139+
//std::cout << "Cell " << c << std::endl;
140+
//std::cout << " Msize = " << Msize(exporder, FMMGEN_SOURCEORDER) << std::endl;
141+
size_t msize = Msize(exporder, FMMGEN_SOURCEORDER);
142+
double *M = new double[msize]();
143+
164144
if (cells[c].nleaf < ncrit) {
165-
for(size_t i = 0; i < cells[c].nleaf; i++) {
166-
size_t l = cells[c].leaf[i];
167-
M[0] = particles[l].mu[0];
168-
M[1] = particles[l].mu[1];
169-
M[2] = particles[l].mu[2];
170-
// std::cout << "mu[" << l << "] = " << particles[l].mu[0] << std::endl;
171-
double dx = (particles[l].r[0] - cells[c].x);
172-
double dy = (particles[l].r[1] - cells[c].y);
173-
double dz = (particles[l].r[2] - cells[c].z);
174-
M2M(-dx, -dy, -dz, M, cells[c].M, exporder);
175-
}
145+
for(size_t i = 0; i < cells[c].nleaf; i++) {
146+
size_t l = cells[c].leaf[i];
147+
// Walter dehnen's definition:
148+
// (-1)^m / m! (x_a - z_a)^m
149+
double dx = (cells[c].x - particles[l].r[0]);
150+
double dy = (cells[c].y - particles[l].r[1]);
151+
double dz = (cells[c].z - particles[l].r[2]);
152+
for(int k = 0; k < FMMGEN_SOURCESIZE; k++) {
153+
// std::cout << particles[l].S[k] << std::endl;
154+
M[k] = particles[l].S[k];
155+
}
156+
M2M(dx, dy, dz, M, cells[c].M, exporder);
157+
}
176158
}
159+
delete[] M;
177160
}
178-
delete[] M;
179161
}
180162

181163
void evaluate_M2M(std::vector<Particle> &particles, std::vector<Cell> &cells,
@@ -189,12 +171,17 @@ void evaluate_M2M(std::vector<Particle> &particles, std::vector<Cell> &cells,
189171
by iterating backwards through the nodes because
190172
of the way the tree is constructed.
191173
*/
192-
#pragma omp for
193-
for (size_t c = 1; c < cells.size(); c++) {
174+
// #pragma omp for schedule(dynamic)
175+
// Can't currently go up the tree in parallel.
176+
// Needs to be recursive or summing not correct.
177+
178+
// Dehnen definition:
179+
// M_m(z_p) = (z_p - z_c)^n / n! M_{m - n}
180+
for (size_t c = cells.size() - 1; c > 0; c--) {
194181
size_t p = cells[c].parent;
195-
double dx = cells[p].x - cells[c].x;
196-
double dy = cells[p].y - cells[c].y;
197-
double dz = cells[p].z - cells[c].z;
182+
double dx = (cells[p].x - cells[c].x);
183+
double dy = (cells[p].y - cells[c].y);
184+
double dz = (cells[p].z - cells[c].z);
198185
M2M(dx, dy, dz, cells[c].M, cells[p].M, exporder);
199186
}
200187
}
@@ -206,6 +193,11 @@ void evaluate_M2L_lazy(std::vector<Cell> &cells,
206193
for(size_t i = 0; i < M2L_list.size(); i++) {
207194
size_t B = M2L_list[i].first;
208195
size_t A = M2L_list[i].second;
196+
// Dehnen definition:
197+
// F_n(z_B) = M_m(z_A) * D_{n+m} (z_B - z_A)
198+
199+
// So here, we've reversed the order:
200+
// F_n(z_A) = M_m(z_B) * D_{n+m} (z_A - z_B)
209201
double dx = cells[A].x - cells[B].x;
210202
double dy = cells[A].y - cells[B].y;
211203
double dz = cells[A].z - cells[B].z;
@@ -225,16 +217,17 @@ void evaluate_P2P_lazy(std::vector<Cell> &cells, std::vector<Particle> &particle
225217

226218

227219
void evaluate_L2L(std::vector<Cell> &cells, size_t exporder) {
228-
#pragma omp for // schedule(runtime)
229-
for (size_t i = 0; i < cells.size(); i++) {
220+
// Can't currently go down the tree in parallel!
221+
// needs to be recursive or summing not correct.
222+
for (size_t p = 0; p < cells.size(); p++) {
230223
for (int octant = 0; octant < 8; octant++) {
231-
if (cells[i].nchild & (1 << octant)) {
232-
// for child in cell i
233-
size_t c = cells[i].child[octant];
234-
double dx = cells[c].x - cells[i].x;
235-
double dy = cells[c].y - cells[i].y;
236-
double dz = cells[c].z - cells[i].z;
237-
L2L(dx, dy, dz, cells[i].L, cells[c].L, exporder);
224+
if (cells[p].nchild & (1 << octant)) {
225+
// for child c in cell p
226+
size_t c = cells[p].child[octant];
227+
double dx = cells[c].x - cells[p].x;
228+
double dy = cells[c].y - cells[p].y;
229+
double dz = cells[c].z - cells[p].z;
230+
L2L(dx, dy, dz, cells[p].L, cells[c].L, exporder);
238231
}
239232
}
240233
}
@@ -245,36 +238,77 @@ void evaluate_L2P(std::vector<Particle> &particles, std::vector<Cell> &cells,
245238
#pragma omp for schedule(runtime)
246239
for (size_t i = 0; i < cells.size(); i++) {
247240
if (cells[i].nleaf < ncrit) {
248-
// std::cout << "cell " << i << " is a leaf " << std::endl;
249241
for (size_t p = 0; p < cells[i].nleaf; p++) {
250242
size_t k = cells[i].leaf[p];
251-
// std::cout << "L2P from " << i << " to particle " << k << std::endl;
252243
double dx = particles[k].r[0] - cells[i].x;
253244
double dy = particles[k].r[1] - cells[i].y;
254245
double dz = particles[k].r[2] - cells[i].z;
255-
double Fv[3] = {0.0};
256-
L2P(dx, dy, dz, cells[i].L, Fv, exporder);
257-
F[3*k+0] -= Fv[0];
258-
F[3*k+1] -= Fv[1];
259-
F[3*k+2] -= Fv[2];
246+
L2P(dx, dy, dz, cells[i].L, &F[FMMGEN_OUTPUTSIZE*k], exporder);
260247
}
261248
}
262249
}
263250
}
264251

265252
void evaluate_direct(std::vector<Particle> &particles, double *F, size_t n) {
266-
#pragma omp parallel for schedule(runtime)
253+
#pragma omp parallel for schedule(runtime)
267254
for (size_t i = 0; i < n; i++) {
268255
for (size_t j = 0; j < n; j++) {
269-
if (i != j) {
270-
double dx = particles[i].r[0] - particles[j].r[0];
271-
double dy = particles[i].r[1] - particles[j].r[1];
272-
double dz = particles[i].r[2] - particles[j].r[2];
273-
double mux = particles[j].mu[0];
274-
double muy = particles[j].mu[1];
275-
double muz = particles[j].mu[2];
276-
P2P(dx, dy, dz, mux, muy, muz, &F[3*i]);
256+
if (i != j) {
257+
double dx = particles[i].r[0] - particles[j].r[0];
258+
double dy = particles[i].r[1] - particles[j].r[1];
259+
double dz = particles[i].r[2] - particles[j].r[2];
260+
P2P(dx, dy, dz, particles[j].S, &F[FMMGEN_OUTPUTSIZE*i]);
261+
}
262+
}
263+
}
264+
}
265+
266+
void evaluate_M2P_and_P2P(std::vector<Particle> &particles, unsigned int p, unsigned int i,
267+
std::vector<Cell> &cells, double *F, unsigned int n_crit, double theta,
268+
unsigned int exporder) {
269+
// For particle i, start at cell p
270+
double dx, dy, dz, r;
271+
int c;
272+
unsigned int j;
273+
// If cell p is not a leaf cell:
274+
if (cells[p].nleaf >= n_crit) {
275+
// Iterate through it's children
276+
for (unsigned int octant = 0; octant < 8; octant++) {
277+
// If a child exists in a given octant:
278+
if (cells[p].nchild & (1 << octant)) {
279+
// Get the child's index
280+
c = cells[p].child[octant];
281+
// Calculate the distance from the particle to the child cell
282+
dx = particles[i].r[0] - cells[c].x;
283+
dy = particles[i].r[1] - cells[c].y;
284+
dz = particles[i].r[2] - cells[c].z;
285+
r = sqrt(dx*dx + dy*dy + dz*dz);
286+
// Apply the Barnes-Hut criterion:
287+
if (cells[c].r > theta * r) {
288+
// If the cell is 'near':
289+
evaluate_M2P_and_P2P(particles, c, i, cells, F, n_crit, theta, exporder);
290+
}
291+
else {
292+
// If the cell is 'far', calculate the potential and field
293+
// on the particle from the multipole expansion:
294+
M2P(dx, dy, dz, cells[c].M, &F[FMMGEN_OUTPUTSIZE*i], exporder);
277295
}
278296
}
297+
}
298+
}
299+
else {
300+
// loop in leaf cell's particles
301+
for(unsigned int l = 0; l < (cells[p].nleaf); l++) {
302+
// Get the particle index:
303+
j = cells[p].leaf[l];
304+
if (i != j) {
305+
// Calculate the interparticle distance:
306+
dx = particles[i].r[0] - particles[j].r[0];
307+
dy = particles[i].r[1] - particles[j].r[1];
308+
dz = particles[i].r[2] - particles[j].r[2];
309+
// Compute the field:
310+
P2P(dx, dy, dz, particles[j].S, &F[FMMGEN_OUTPUTSIZE*i]);
311+
}
312+
}
279313
}
280314
}

fidimag/atomistic/fmmlib/calculate.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include <iostream>
1515
#include<omp.h>
1616

17-
void P2P(double x, double y, double z, double mux, double muy, double muz, double *F);
17+
void M_sanity_check(const std::vector<Cell> &cells);
1818

1919
void evaluate_P2M(std::vector<Particle> &particles, std::vector<Cell> &cells,
2020
size_t cell, size_t ncrit, size_t exporder);
@@ -52,3 +52,7 @@ void evaluate_M2L_lazy(std::vector<Cell> &cells,
5252

5353
void evaluate_P2P_lazy(std::vector<Cell> &cells, std::vector<Particle> &particles,
5454
std::vector<std::pair<size_t, size_t>> &P2P_list, double *F);
55+
56+
void evaluate_M2P_and_P2P(std::vector<Particle> &particles, unsigned int p, unsigned int i,
57+
std::vector<Cell> &cells, double *F, unsigned int n_crit, double theta,
58+
unsigned int exporder);

fidimag/atomistic/fmmlib/example.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import fmmgen
22

3-
order = 11
3+
order = 8
44
source_order = 1
55
cse = True
66
atomic = True
77
precision='double'
88
fmmgen.generate_code(order, "operators",
99
precision=precision,
1010
CSE=cse,
11-
cython_wrapper=False,
11+
cython=False,
12+
harmonic_derivs=True,
1213
potential=False,
1314
field=True,
1415
source_order=source_order,

0 commit comments

Comments
 (0)