5
5
#include "demagcoef.h"
6
6
7
7
8
- double Nxxdipole (double x , double y , double z ) {
8
+ inline double Nxxdipole (double x , double y , double z ) {
9
9
double x2 = x * x ;
10
10
double y2 = y * y ;
11
11
double z2 = z * z ;
@@ -16,7 +16,7 @@ double Nxxdipole(double x, double y, double z) {
16
16
return - (2 * x2 - y2 - z2 ) / (R * R * r );
17
17
}
18
18
19
- double Nxydipole (double x , double y , double z ) {
19
+ inline double Nxydipole (double x , double y , double z ) {
20
20
double R = x * x + y * y + z * z ;
21
21
if (R == 0 )
22
22
return 0.0 ;
@@ -54,9 +54,16 @@ void compute_dipolar_tensors(fft_demag_plan *plan) {
54
54
int lenx = plan -> lenx ;
55
55
int leny = plan -> leny ;
56
56
int lenz = plan -> lenz ;
57
- int lenxy = lenx * leny ;
58
-
59
-
57
+ int lenxy = lenx * leny ;
58
+ // Parallelising this like this
59
+ // means that z should be the largest index
60
+ // in order to get better performance from threading.
61
+ // The data writing is not very clever here; we're
62
+ // going to be invalidating the cache a lot. It would be better
63
+ // maybe to split this into six seperate loops for each component
64
+ // of the tensor in order that each thread is working on a smaller
65
+ // memory address range
66
+ #pragma omp parallel for private(j, i, x, y, z, id) schedule(dynamic, 32)
60
67
for (k = 0 ; k < lenz ; k ++ ) {
61
68
for (j = 0 ; j < leny ; j ++ ) {
62
69
for (i = 0 ; i < lenx ; i ++ ) {
@@ -81,7 +88,7 @@ void compute_dipolar_tensors(fft_demag_plan *plan) {
81
88
void compute_demag_tensors (fft_demag_plan * plan ) {
82
89
83
90
int i , j , k , id ;
84
- double x , y , z ;
91
+ double x , y , z , radius_sq ;
85
92
86
93
int nx = plan -> nx ;
87
94
int ny = plan -> ny ;
@@ -97,7 +104,7 @@ void compute_demag_tensors(fft_demag_plan *plan) {
97
104
98
105
double length = pow (dx * dy * dz , 1 /3.0 );
99
106
double asymptotic_radius_sq = pow (26.0 * length ,2.0 );
100
-
107
+ #pragma omp parallel for private(j, i, id, x, y, z, radius_sq) schedule(dynamic, 32)
101
108
for (k = 0 ; k < lenz ; k ++ ) {
102
109
for (j = 0 ; j < leny ; j ++ ) {
103
110
for (i = 0 ; i < lenx ; i ++ ) {
@@ -107,7 +114,7 @@ void compute_demag_tensors(fft_demag_plan *plan) {
107
114
y = (j - ny + 1 ) * dy ;
108
115
z = (k - nz + 1 ) * dz ;
109
116
110
- double radius_sq = x * x + y * y + z * z ;
117
+ radius_sq = x * x + y * y + z * z ;
111
118
112
119
if (radius_sq > asymptotic_radius_sq ){
113
120
//printf("%g %g %g %g %g %g\n",x,y,z,dx,dy,dz);
@@ -338,7 +345,7 @@ void compute_fields(fft_demag_plan *plan, double *spin, double *mu_s, double *fi
338
345
//print_r("hz", plan->hz, plan->total_length);
339
346
340
347
double scale = -1.0 / plan -> total_length ;
341
-
348
+ #pragma omp parallel for private(j, i, id1, id2) schedule(dynamic, 32)
342
349
for (k = 0 ; k < nz ; k ++ ) {
343
350
for (j = 0 ; j < ny ; j ++ ) {
344
351
for (i = 0 ; i < nx ; i ++ ) {
0 commit comments