Skip to content

Commit 7fdc3c7

Browse files
committed
butcher_row performance optimization
1 parent 3252969 commit 7fdc3c7

File tree

2 files changed

+93
-8
lines changed

2 files changed

+93
-8
lines changed

source/State.hpp

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class StateVarDeriv
168168
* @return The module of the linear acceleration, or their sum in case
169169
* of lists of accelerations
170170
*/
171-
real MakeStationary(const real &dt);
171+
real MakeStationary(const real& dt);
172172

173173
/** @brief Carry out a Newmark step
174174
*
@@ -382,7 +382,7 @@ class DMoorDynStateDt
382382
* @param dt Time step.
383383
* @return The sum of the linear acceleration norms
384384
*/
385-
real MakeStationary(const real &dt);
385+
real MakeStationary(const real& dt);
386386

387387
/** @brief Carry out a Newmark step
388388
*
@@ -442,4 +442,74 @@ class DMoorDynStateDt
442442
void Mix(const DMoorDynStateDt& visitor, const real& f);
443443
};
444444

445+
/**
446+
* @brief Do the computation for a row of a Butcher Tableau for an explicit
447+
* integrator
448+
*
449+
* This function unwraps the computation so that is avoids any allocation.
450+
* This function essentially computes:
451+
* out_state = start_state + sum(scales[i] * derivs[i] for i = 1:N)
452+
*
453+
* out_state and start_state can be the same state.
454+
*
455+
* @tparam N Number of columns in the row
456+
* @param out_state Where to save the new state
457+
* @param start_state Starting state
458+
* @param scales Derivative weights, one for each derivative state
459+
* @param derivs State derivative values
460+
*/
461+
template<unsigned int N>
462+
constexpr void
463+
butcher_row(MoorDynState& out_state,
464+
const MoorDynState& start_state,
465+
const std::array<real, N>& scales,
466+
const std::array<const DMoorDynStateDt* const, N>& derivs)
467+
{
468+
static_assert(N > 0, "butcher_row must have at least one state deriv");
469+
470+
for (unsigned int lineIdx = 0; lineIdx < out_state.lines.size();
471+
lineIdx++) {
472+
auto& line = out_state.lines[lineIdx];
473+
for (unsigned int i = 0; i < line.pos.size(); i++) {
474+
line.pos[i] = start_state.lines[lineIdx].pos[i];
475+
line.vel[i] = start_state.lines[lineIdx].vel[i];
476+
for (unsigned int j = 0; j < N; j++) {
477+
line.pos[i] += scales[j] * derivs[j]->lines[lineIdx].vel[i];
478+
line.vel[i] += scales[j] * derivs[j]->lines[lineIdx].acc[i];
479+
}
480+
}
481+
}
482+
483+
for (unsigned int pointIdx = 0; pointIdx < out_state.points.size();
484+
pointIdx++) {
485+
auto& conn = out_state.points[pointIdx];
486+
conn.pos = start_state.points[pointIdx].pos;
487+
conn.vel = start_state.points[pointIdx].vel;
488+
for (unsigned int j = 0; j < N; j++) {
489+
conn.pos += scales[j] * derivs[j]->points[pointIdx].vel;
490+
conn.vel += scales[j] * derivs[j]->points[pointIdx].acc;
491+
}
492+
}
493+
494+
for (unsigned int rodIdx = 0; rodIdx < out_state.rods.size(); rodIdx++) {
495+
auto& rod = out_state.rods[rodIdx];
496+
rod.pos = start_state.rods[rodIdx].pos;
497+
rod.vel = start_state.rods[rodIdx].vel;
498+
for (unsigned int j = 0; j < N; j++) {
499+
rod.pos = rod.pos + derivs[j]->rods[rodIdx].vel * scales[j];
500+
rod.vel = rod.vel + scales[j] * derivs[j]->rods[rodIdx].acc;
501+
}
502+
}
503+
for (unsigned int bodyIdx = 0; bodyIdx < out_state.bodies.size();
504+
bodyIdx++) {
505+
auto& body = out_state.bodies[bodyIdx];
506+
body.pos = start_state.bodies[bodyIdx].pos;
507+
body.vel = start_state.bodies[bodyIdx].vel;
508+
for (unsigned int j = 0; j < N; j++) {
509+
body.pos = body.pos + derivs[j]->bodies[bodyIdx].vel * scales[j];
510+
body.vel = body.vel + scales[j] * derivs[j]->bodies[bodyIdx].acc;
511+
}
512+
}
513+
}
514+
445515
} // ::moordyn

source/Time.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
*/
3030

3131
#include "Time.hpp"
32+
#include "State.hpp"
3233
#include "Waves.hpp"
3334
#include <sstream>
3435

@@ -382,11 +383,13 @@ RK2Scheme::Step(real& dt)
382383
// Compute the intermediate state
383384
CalcStateDeriv(0);
384385
t += 0.5 * dt;
385-
r[1] = r[0] + rd[0] * (0.5 * dt);
386+
// r[1] = r[0] + rd[0] * (0.5 * dt);
387+
butcher_row<1>(r[1], r[0], { 0.5 * dt }, { &rd[0] });
386388
Update(0.5 * dt, 1);
387389
// And so we can compute the new derivative and apply it
388390
CalcStateDeriv(1);
389-
r[0] = r[0] + rd[1] * dt;
391+
// r[0] = r[0] + rd[1] * dt;
392+
butcher_row<1>(r[0], r[0], { dt }, { &rd[1] });
390393
t += 0.5 * dt;
391394
Update(dt, 0);
392395
TimeSchemeBase::Step(dt);
@@ -408,23 +411,35 @@ RK4Scheme::Step(real& dt)
408411

409412
// k2
410413
t += 0.5 * dt;
411-
r[1] = r[0] + rd[0] * (0.5 * dt);
414+
// r[1] = r[0] + rd[0] * (0.5 * dt);
415+
butcher_row<1>(r[1], r[0], { 0.5 * dt }, { &rd[0] });
416+
412417
Update(0.5 * dt, 1);
413418
CalcStateDeriv(1);
414419

415420
// k3
416-
r[1] = r[0] + rd[1] * (0.5 * dt);
421+
422+
// r[1] = r[0] + rd[1] * (0.5 * dt);
423+
butcher_row<1>(r[1], r[0], { 0.5 * dt }, { &rd[1] });
417424
Update(0.5 * dt, 1);
418425
CalcStateDeriv(2);
419426

420427
// k4
421428
t += 0.5 * dt;
422-
r[2] = r[0] + rd[2] * dt;
429+
430+
// r[2] = r[0] + rd[2] * dt;
431+
butcher_row<1>(r[2], r[0], { dt }, { &rd[2] });
432+
423433
Update(dt, 2);
424434
CalcStateDeriv(3);
425435

426436
// Apply
427-
r[0] = r[0] + (rd[0] + rd[3]) * (dt / 6.0) + (rd[1] + rd[2]) * (dt / 3.0);
437+
// r[0] = r[0] + (rd[0] + rd[3]) * (dt / 6.0) + (rd[1] + rd[2]) * (dt
438+
// / 3.0);
439+
butcher_row<4>(r[0],
440+
r[0],
441+
{ dt / 6.0, dt / 3.0, dt / 3.0, dt / 6.0 },
442+
{ &rd[0], &rd[1], &rd[2], &rd[3] });
428443

429444
Update(dt, 0);
430445
TimeSchemeBase::Step(dt);

0 commit comments

Comments
 (0)