Skip to content

Commit 02c060c

Browse files
committed
Parallelise tensor computation
1 parent ba15307 commit 02c060c

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

fidimag/common/dipolar/demag.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "demagcoef.h"
66

77

8-
double Nxxdipole(double x, double y, double z) {
8+
inline double Nxxdipole(double x, double y, double z) {
99
double x2 = x * x;
1010
double y2 = y * y;
1111
double z2 = z * z;
@@ -16,7 +16,7 @@ double Nxxdipole(double x, double y, double z) {
1616
return -(2 * x2 - y2 - z2) / (R * R * r);
1717
}
1818

19-
double Nxydipole(double x, double y, double z) {
19+
inline double Nxydipole(double x, double y, double z) {
2020
double R = x * x + y * y + z * z;
2121
if (R == 0)
2222
return 0.0;
@@ -54,9 +54,16 @@ void compute_dipolar_tensors(fft_demag_plan *plan) {
5454
int lenx = plan->lenx;
5555
int leny = plan->leny;
5656
int lenz = plan->lenz;
57-
int lenxy = lenx * leny;
58-
59-
57+
int lenxy = lenx * leny;
58+
// Parallelising this like this
59+
// means that z should be the largest index
60+
// in order to get better performance from threading.
61+
// The data writing is not very clever here; we're
62+
// going to be invalidating the cache a lot. It would be better
63+
// maybe to split this into six seperate loops for each component
64+
// of the tensor in order that each thread is working on a smaller
65+
// memory address range
66+
#pragma omp parallel for private(j, i, x, y, z, id) schedule(dynamic, 32)
6067
for (k = 0; k < lenz; k++) {
6168
for (j = 0; j < leny; j++) {
6269
for (i = 0; i < lenx; i++) {
@@ -81,7 +88,7 @@ void compute_dipolar_tensors(fft_demag_plan *plan) {
8188
void compute_demag_tensors(fft_demag_plan *plan) {
8289

8390
int i, j, k, id;
84-
double x, y, z;
91+
double x, y, z, radius_sq;
8592

8693
int nx = plan->nx;
8794
int ny = plan->ny;
@@ -97,7 +104,7 @@ void compute_demag_tensors(fft_demag_plan *plan) {
97104

98105
double length = pow(dx*dy*dz, 1/3.0);
99106
double asymptotic_radius_sq = pow(26.0*length,2.0);
100-
107+
#pragma omp parallel for private(j, i, id, x, y, z, radius_sq) schedule(dynamic, 32)
101108
for (k = 0; k < lenz; k++) {
102109
for (j = 0; j < leny; j++) {
103110
for (i = 0; i < lenx; i++) {
@@ -107,7 +114,7 @@ void compute_demag_tensors(fft_demag_plan *plan) {
107114
y = (j - ny + 1) * dy;
108115
z = (k - nz + 1) * dz;
109116

110-
double radius_sq = x*x+y*y+z*z;
117+
radius_sq = x*x+y*y+z*z;
111118

112119
if (radius_sq>asymptotic_radius_sq){
113120
//printf("%g %g %g %g %g %g\n",x,y,z,dx,dy,dz);
@@ -338,7 +345,7 @@ void compute_fields(fft_demag_plan *plan, double *spin, double *mu_s, double *fi
338345
//print_r("hz", plan->hz, plan->total_length);
339346

340347
double scale = -1.0 / plan->total_length;
341-
348+
#pragma omp parallel for private(j, i, id1, id2) schedule(dynamic, 32)
342349
for (k = 0; k < nz; k++) {
343350
for (j = 0; j < ny; j++) {
344351
for (i = 0; i < nx; i++) {

0 commit comments

Comments
 (0)