Skip to content

Commit 79bfa0d

Browse files
committed
Spawn and cilk_for everything in Naive, Grid::for_each_* loops, and vectorized prepare/filter
1 parent d7c3f38 commit 79bfa0d

File tree

6 files changed

+59
-32
lines changed

6 files changed

+59
-32
lines changed

.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ AllowShortLoopsOnASingleLine: false
88
BreakBeforeBraces: Attach
99
InsertBraces: true
1010
ColumnLimit: 100
11+
ForEachMacros: [ cilk_for ]
1112

1213
IncludeCategories:
1314
- Regex: '^<'

cpp/lib/Brackets.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ void Brackets::bracket(DxDy<View::R_XY> const &op1, DxDy<View::R_XY> const &op2,
1414
void Brackets::derivatives(View::C_XY const &op, DxDy<View::R_XY> output) const {
1515
DxDy<Buf::C_XY> Der_K{grid.KX, grid.KY};
1616
prepareDXY(op, Der_K);
17-
tf.bfft(Der_K.DX, output.DX);
18-
tf.bfft(Der_K.DY, output.DY);
17+
cilk_scope {
18+
cilk_spawn tf.bfft(Der_K.DX, output.DX);
19+
tf.bfft(Der_K.DY, output.DY);
20+
}
1921
}
2022

2123
Brackets::Buf::C_XY Brackets::halfBracket(DxDy<View::R_XY> derOp1, DxDy<View::R_XY> derOp2) const {
@@ -30,8 +32,10 @@ Brackets::Buf::C_XY Brackets::halfBracket(DxDy<View::R_XY> derOp1, DxDy<View::R_
3032

3133
[[nodiscard]] Brackets::Buf::C_XY Brackets::fullBracket(View::C_XY op1, View::C_XY op2) const {
3234
auto derOp1 = grid.dBufXY(), derOp2 = grid.dBufXY();
33-
derivatives(op1, derOp1);
34-
derivatives(op2, derOp2);
35+
cilk_scope {
36+
cilk_spawn derivatives(op1, derOp1);
37+
derivatives(op2, derOp2);
38+
}
3539

3640
return halfBracket(derOp1, derOp2);
3741
}

cpp/lib/Filter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void HouLiFilterCached1DVector::operator()(Grid::View::C_XY view) const {
6868

6969
static_assert(view.stride(0) == 1); // contiguous in kx
7070

71-
for (int ky = 0; ky < grid.KY; ky += KY_TILE) {
71+
cilk_for (int ky = 0; ky < grid.KY; ky += KY_TILE) {
7272
// avoid std::vector dereference inside loop:
7373
// broadcast fy value into vector
7474
std::array<VReal, KY_TILE> vfy;

cpp/lib/Naive.cpp

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,22 @@ void Naive::run(Dim N, Dim saveInterval) {
8484
}
8585

8686
hyper = HyperCoefficients::calculate(dt, g);
87-
exp_nu.update(hyper, dt);
88-
exp_nu_g.update(hyper, dt);
89-
exp_eta.update(hyper, dt);
90-
exp_gm.update(hyper, dt);
87+
cilk_scope {
88+
cilk_spawn exp_nu.update(hyper, dt);
89+
cilk_spawn exp_nu_g.update(hyper, dt);
90+
cilk_spawn exp_eta.update(hyper, dt);
91+
exp_gm.update(hyper, dt);
92+
}
9193

9294
spdlog::debug("dt: {}", dt);
9395

9496
// store results of nonlinear operators, as well as results of predictor step
9597
auto GM_K_Star = g.cBufMXY(), GM_Nonlinear_K = g.cBufMXY();
9698

99+
// spawn a bunch of stuff here
97100
// Compute N
98-
auto bracketPhiNE_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, N_E));
99-
auto bracketAParUEKPar_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dUEKPar);
101+
auto bracketPhiNE_K = cilk_spawn br.halfBracket(dPhi, Grid::sliceXY(dGM, N_E));
102+
auto bracketAParUEKPar_K = cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), dUEKPar);
100103

101104
// Compute A
102105
auto dPhiNeG2 = g.dBufXY();
@@ -114,8 +117,11 @@ void Naive::run(Dim N, Dim saveInterval) {
114117
});
115118
}
116119

117-
auto bracketAParPhiG2Ne_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dPhiNeG2);
120+
// TODO this one could be before the loop if the halfBracket wasn't destructive
121+
// - for some reason this only shows up on MultiRun tests
122+
auto bracketAParPhiG2Ne_K = cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), dPhiNeG2);
118123
auto bracketUEParPhi_K = br.halfBracket(dUEKPar, dPhi);
124+
cilk_sync;
119125

120126
g.for_each_kxky([&](Dim kx, Dim ky) {
121127
GM_Nonlinear_K(kx, ky, N_E) =
@@ -132,12 +138,12 @@ void Naive::run(Dim N, Dim saveInterval) {
132138

133139
if (g.M > 2) {
134140
// Compute G2
135-
auto bracketPhiG2_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, G_MIN));
141+
auto bracketPhiG2_K = cilk_spawn br.halfBracket(dPhi, Grid::sliceXY(dGM, G_MIN));
136142
auto bracketAParG3_K =
137-
br.halfBracket(Grid::sliceXY(dGM, A_PAR), Grid::sliceXY(dGM, G_MIN + 1));
143+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), Grid::sliceXY(dGM, G_MIN + 1));
138144

139145
// Compute G_{M-1}
140-
auto bracketPhiGLast_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, LAST));
146+
auto bracketPhiGLast_K = cilk_spawn br.halfBracket(dPhi, Grid::sliceXY(dGM, LAST));
141147
auto bracketAParGLast_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), Grid::sliceXY(dGM, LAST));
142148
g.for_each_kxky([&](Dim kx, Dim ky) {
143149
bracketAParGLast_K(kx, ky) *= nonlinear::GLastBracketFactor(g.M, g.kPerp2(kx, ky), hyper);
@@ -148,6 +154,7 @@ void Naive::run(Dim N, Dim saveInterval) {
148154
auto dBrLast = g.dBufXY();
149155
br.derivatives(bracketAParGLast_K, dBrLast);
150156
auto bracketTotalGLast_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dBrLast);
157+
cilk_sync;
151158

152159
g.for_each_kxky([&](Dim kx, Dim ky) {
153160
GM_Nonlinear_K(kx, ky, G_MIN) = nonlinear::G2(
@@ -162,17 +169,19 @@ void Naive::run(Dim N, Dim saveInterval) {
162169
dt / 2.0 * (1 + exp_gm(LAST) * exp_nu_g(kx, ky)) * GM_Nonlinear_K(kx, ky, LAST);
163170
});
164171

165-
auto dGMinusPlus = g.dBufXY();
166-
for (Dim m = G_MIN + 1; m < LAST; ++m) {
172+
cilk_for (Dim m = G_MIN + 1; m < LAST; ++m) {
173+
auto dGMinusPlus = g.dBufXY();
167174
g.for_each_xy([&](Dim x, Dim y) {
168175
dGMinusPlus.DX(x, y) =
169176
std::sqrt(m) * dGM.DX(x, y, m - 1) + std::sqrt(m + 1) * dGM.DX(x, y, m + 1);
170177
dGMinusPlus.DY(x, y) =
171178
std::sqrt(m) * dGM.DY(x, y, m - 1) + std::sqrt(m + 1) * dGM.DY(x, y, m + 1);
172179
});
173180

174-
auto bracketAParGMMinusPlus_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dGMinusPlus);
181+
auto bracketAParGMMinusPlus_K =
182+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), dGMinusPlus);
175183
auto bracketPhiGM_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, m));
184+
cilk_sync;
176185

177186
g.for_each_kxky([&](Dim kx, Dim ky) {
178187
GM_Nonlinear_K(kx, ky, m) =
@@ -196,13 +205,14 @@ void Naive::run(Dim N, Dim saveInterval) {
196205

197206
auto dPhi_Loop = g.dBufXY(), dUEKPar_Loop = g.dBufXY();
198207
auto dGM_Loop = g.dBufMXY();
199-
br.derivatives(phi_K_New, dPhi_Loop);
200-
br.derivatives(ueKPar_K_New, dUEKPar_Loop);
208+
cilk_spawn br.derivatives(phi_K_New, dPhi_Loop);
209+
cilk_spawn br.derivatives(ueKPar_K_New, dUEKPar_Loop);
201210

202-
for (int m = 0; m < g.M; ++m) {
211+
cilk_for (int m = 0; m < g.M; ++m) {
203212
// TODO(OPT) not necessary if we bail (only up to G_MIN)
204213
br.derivatives(Grid::sliceXY(GM_K_Star, m), Grid::sliceXY(dGM_Loop, m));
205214
}
215+
cilk_sync;
206216

207217
// Corrector loop
208218
// TODO confirm that only m derivatives are needed at a time
@@ -239,8 +249,9 @@ void Naive::run(Dim N, Dim saveInterval) {
239249
});
240250

241251
auto bracketAParPhiG2Ne_K_Loop =
242-
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dPhiNeG2_Loop);
252+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dPhiNeG2_Loop);
243253
auto bracketUEParPhi_K_Loop = br.halfBracket(dUEKPar_Loop, dPhi_Loop);
254+
cilk_sync;
244255

245256
/// f_pred from Viriato
246257
auto GM_Nonlinear_K_Loop = g.cBufMXY();
@@ -272,11 +283,14 @@ void Naive::run(Dim N, Dim saveInterval) {
272283
spdlog::debug("sumAParRelError: {}, relative_error: {}", sumAParRelError, relative_error);
273284
// TODO(OPT) bail if relative error is large
274285

275-
DerivateNewMoment(A_PAR);
276-
br.derivatives(ueKPar_K_New, dUEKPar_Loop);
286+
auto bracketPhiNE_K_Loop = cilk_spawn br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, N_E));
277287

278-
auto bracketPhiNE_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, N_E));
288+
cilk_scope {
289+
cilk_spawn DerivateNewMoment(A_PAR);
290+
br.derivatives(ueKPar_K_New, dUEKPar_Loop);
291+
}
279292
auto bracketAParUEKPar_K_Loop = br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dUEKPar_Loop);
293+
cilk_sync;
280294

281295
g.for_each_kxky([&](Dim kx, Dim ky) {
282296
GM_Nonlinear_K_Loop(kx, ky, N_E) =
@@ -290,13 +304,17 @@ void Naive::run(Dim N, Dim saveInterval) {
290304
(kx | ky) == 0 ? 0 : nonlinear::phi(momentsNew_K(kx, ky, N_E), g.kPerp2(kx, ky));
291305
});
292306

293-
br.derivatives(phi_K_New, dPhi_Loop);
307+
cilk_spawn br.derivatives(phi_K_New, dPhi_Loop);
294308
DerivateNewMoment(N_E);
309+
cilk_sync;
310+
295311
if (g.M > 2) {
296312
// Compute G2
297-
auto bracketPhiG2_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, G_MIN));
313+
auto bracketPhiG2_K_Loop =
314+
cilk_spawn br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, G_MIN));
298315
auto bracketAParG3_K_Loop =
299316
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), Grid::sliceXY(dGM_Loop, G_MIN + 1));
317+
cilk_sync;
300318

301319
g.for_each_kxky([&](Dim kx, Dim ky) {
302320
GM_Nonlinear_K_Loop(kx, ky, G_MIN) =
@@ -319,8 +337,9 @@ void Naive::run(Dim N, Dim saveInterval) {
319337
});
320338

321339
auto bracketAParGMMinusPlus_K_Loop =
322-
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dGMinusPlus_Loop);
340+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dGMinusPlus_Loop);
323341
auto bracketPhiGM_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, m));
342+
cilk_sync;
324343

325344
g.for_each_kxky([&](Dim kx, Dim ky) {
326345
GM_Nonlinear_K_Loop(kx, ky, m) = nonlinear::GM(m, bracketPhiGM_K_Loop(kx, ky),
@@ -336,7 +355,8 @@ void Naive::run(Dim N, Dim saveInterval) {
336355
}
337356

338357
// Compute G_{M-1}
339-
auto bracketPhiGLast_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, LAST));
358+
auto bracketPhiGLast_K_Loop =
359+
cilk_spawn br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, LAST));
340360
auto bracketAParGLast_K_Loop =
341361
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), Grid::sliceXY(dGM_Loop, LAST));
342362
g.for_each_kxky([&](Dim kx, Dim ky) {
@@ -351,6 +371,7 @@ void Naive::run(Dim N, Dim saveInterval) {
351371
br.derivatives(bracketAParGLast_K_Loop, dBrLast_Loop);
352372
auto bracketTotalGLast_K_Loop =
353373
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dBrLast_Loop);
374+
cilk_sync;
354375

355376
g.for_each_kxky([&](Dim kx, Dim ky) {
356377
GM_Nonlinear_K_Loop(kx, ky, LAST) =

cpp/lib/PrepareDerivatives.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ void PrepareDerivativesVector::operator()(View::C_XY const &in, DxDy<View::C_XY>
1717
VReal const kx_v_init{[](int idx, int) { return Real(idx / 2); }};
1818
VReal const norm_v{XYNorm};
1919

20-
for (Dim ky = 0; ky < grid.KY; ky += KY_TILE) {
20+
cilk_for (Dim ky = 0; ky < grid.KY; ky += KY_TILE) {
2121
// broadcast ky values
2222
using TileReal = std::array<VReal, KY_TILE>;
2323
TileReal ky_v;

cpp/lib/grid.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include "cilk.hpp"
34
#include "constants.hpp"
45
#include "typedefs.hpp"
56

@@ -103,7 +104,7 @@ struct Grid {
103104

104105
/// Iterate in real space
105106
void for_each_xy(std::invocable<Dim, Dim> auto fun) const {
106-
for (Dim y = 0; y < Y; ++y) {
107+
cilk_for (Dim y = 0; y < Y; ++y) {
107108
for (Dim x = 0; x < X; ++x) {
108109
fun(x, y);
109110
}
@@ -112,7 +113,7 @@ struct Grid {
112113

113114
/// Iterate in phase space
114115
void for_each_kxky(std::invocable<Dim, Dim> auto fun) const {
115-
for (Dim ky = 0; ky < KY; ++ky) {
116+
cilk_for (Dim ky = 0; ky < KY; ++ky) {
116117
for (Dim kx = 0; kx < KX; ++kx) {
117118
fun(kx, ky);
118119
}

0 commit comments

Comments
 (0)