Skip to content

Commit a245d8a

Browse files
committed
Reduce number of multiplications in demag tensor calculations
1 parent 580bbe6 commit a245d8a

File tree

1 file changed

+25
-18
lines changed

1 file changed

+25
-18
lines changed

fidimag/common/dipolar/demag_oommf.c

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,13 @@ double DemagNxxAsymptotic(double x, double y, double z,
399399
double dx4 = dx2*dx2;
400400
double dy4 = dy2*dy2;
401401
double dz4 = dz2*dz2;
402+
double dx2dy2 = dx2*dy2;
403+
double dy2dz2 = dy2*dz2;
404+
double dx2dz2 = dx2*dz2;
402405

403406
double term3 = 2*sx2 - sy2 - sz2;
404-
405407
double term5 = 0.0;
408+
406409
if(dx2!=dy2 || dx2!=dz2 || dy2!=dz2) { // Non-cube case
407410
double a1 = 8*dx2 - 4*dy2 - 4*dz2;
408411
double a2 = -24*dx2 + 27*dy2 - 3*dz2;
@@ -415,18 +418,18 @@ double DemagNxxAsymptotic(double x, double y, double z,
415418
term5 *= 0.25;
416419
}
417420

418-
double term7 = 0.0;
421+
double term7 = 0.0;
419422
{
420-
double b1 = 32*dx4 - 40*dx2*dy2 - 40*dx2*dz2 + 12*dy4 + 10*dy2*dz2 + 12*dz4;
421-
double b2 = -240*dx4 + 580*dx2*dy2 + 20*dx2*dz2 - 202*dy4 - 75*dy2*dz2 + 22*dz4;
422-
double b3 = -240*dx4 + 20*dx2*dy2 + 580*dx2*dz2 + 22*dy4 - 75*dy2*dz2 - 202*dz4;
423-
double b4 = 180*dx4 - 505*dx2*dy2 + 55*dx2*dz2 + 232*dy4 - 75*dy2*dz2 + 8*dz4;
424-
double b5 = 360*dx4 - 450*dx2*dy2 - 450*dx2*dz2 - 180*dy4 + 900*dy2*dz2 - 180*dz4;
425-
double b6 = 180*dx4 + 55*dx2*dy2 - 505*dx2*dz2 + 8*dy4 - 75*dy2*dz2 + 232*dz4;
426-
double b7 = -10*dx4 + 30*dx2*dy2 - 5*dx2*dz2 - 16*dy4 + 10*dy2*dz2 - 2*dz4;
427-
double b8 = -30*dx4 + 55*dx2*dy2 + 20*dx2*dz2 + 8*dy4 - 75*dy2*dz2 + 22*dz4;
428-
double b9 = -30*dx4 + 20*dx2*dy2 + 55*dx2*dz2 + 22*dy4 - 75*dy2*dz2 + 8*dz4;
429-
double b10 = -10*dx4 - 5*dx2*dy2 + 30*dx2*dz2 - 2*dy4 + 10*dy2*dz2 - 16*dz4;
423+
double b1 = 32*dx4 - 40*dx2dy2 - 40*dx2dz2 + 12*dy4 + 10*dy2dz2 + 12*dz4;
424+
double b2 = -240*dx4 + 580*dx2dy2 + 20*dx2dz2 - 202*dy4 - 75*dy2dz2 + 22*dz4;
425+
double b3 = -240*dx4 + 20*dx2dy2 + 580*dx2dz2 + 22*dy4 - 75*dy2dz2 - 202*dz4;
426+
double b4 = 180*dx4 - 505*dx2dy2 + 55*dx2dz2 + 232*dy4 - 75*dy2dz2 + 8*dz4;
427+
double b5 = 360*dx4 - 450*dx2dy2 - 450*dx2dz2 - 180*dy4 + 900*dy2dz2 - 180*dz4;
428+
double b6 = 180*dx4 + 55*dx2dy2 - 505*dx2dz2 + 8*dy4 - 75*dy2dz2 + 232*dz4;
429+
double b7 = -10*dx4 + 30*dx2dy2 - 5*dx2dz2 - 16*dy4 + 10*dy2dz2 - 2*dz4;
430+
double b8 = -30*dx4 + 55*dx2dy2 + 20*dx2dz2 + 8*dy4 - 75*dy2dz2 + 22*dz4;
431+
double b9 = -30*dx4 + 20*dx2dy2 + 55*dx2dz2 + 22*dy4 - 75*dy2dz2 + 8*dz4;
432+
double b10 = -10*dx4 - 5*dx2dy2 + 30*dx2dz2 - 2*dy4 + 10*dy2dz2 - 16*dz4;
430433

431434
term7 = b1*sx6 + b2*sx4*sy2 + b3*sx4*sz2 + b4*sx2*sy4 + b5*sx2*sy2*sz2
432435
+ b6*sx2*sz4 + b7*sy6 + b8*sy4*sz2 + b9*sy2*sz4 + b10*sz6;
@@ -468,6 +471,10 @@ double DemagNxyAsymptotic(double x, double y, double z,
468471
double dy4 = dy2*dy2;
469472
double dz4 = dz2*dz2;
470473

474+
double dx2dy2 = dx2*dy2;
475+
double dy2dz2 = dy2*dz2;
476+
double dx2dz2 = dx2*dz2;
477+
471478
double term3 = 3;
472479

473480
double term5 = 0.0;
@@ -481,12 +488,12 @@ double DemagNxyAsymptotic(double x, double y, double z,
481488

482489
double term7 = 0.0;
483490
{
484-
double b1 = 16*dx4 - 30*dx2*dy2 - 10*dx2*dz2 + 10*dy4 + 5*dy2*dz2 + 2*dz4;
485-
double b2 = -40*dx4 + 105*dx2*dy2 - 5*dx2*dz2 - 40*dy4 - 5*dy2*dz2 + 4*dz4;
486-
double b3 = -40*dx4 - 15*dx2*dy2 + 115*dx2*dz2 + 20*dy4 - 35*dy2*dz2 - 32*dz4;
487-
double b4 = 10*dx4 - 30*dx2*dy2 + 5*dx2*dz2 + 16*dy4 - 10*dy2*dz2 + 2*dz4;
488-
double b5 = 20*dx4 - 15*dx2*dy2 - 35*dx2*dz2 - 40*dy4 + 115*dy2*dz2 - 32*dz4;
489-
double b6 = 10*dx4 + 15*dx2*dy2 - 40*dx2*dz2 + 10*dy4 - 40*dy2*dz2 + 32*dz4;
491+
double b1 = 16*dx4 - 30*dx2dy2 - 10*dx2dz2 + 10*dy4 + 5*dy2dz2 + 2*dz4;
492+
double b2 = -40*dx4 + 105*dx2dy2 - 5*dx2dz2 - 40*dy4 - 5*dy2dz2 + 4*dz4;
493+
double b3 = -40*dx4 - 15*dx2dy2 + 115*dx2dz2 + 20*dy4 - 35*dy2dz2 - 32*dz4;
494+
double b4 = 10*dx4 - 30*dx2dy2 + 5*dx2dz2 + 16*dy4 - 10*dy2dz2 + 2*dz4;
495+
double b5 = 20*dx4 - 15*dx2dy2 - 35*dx2dz2 - 40*dy4 + 115*dy2dz2 - 32*dz4;
496+
double b6 = 10*dx4 + 15*dx2dy2 - 40*dx2dz2 + 10*dy4 - 40*dy2dz2 + 32*dz4;
490497
term7 = b1*sx4 + b2*sx2*sy2 + b3*sx2*sz2
491498
+ b4*sy4 + b5*sy2*sz2 + b6*sz4;
492499
term7 *= (7.0/16.0);

0 commit comments

Comments
 (0)