Skip to content

Commit 8984a19

Browse files
committed
Spawn and cilk_for everything in Naive, Grid::for_each_* loops, and vectorized prepare/filter
1 parent d7c3f38 commit 8984a19

File tree

6 files changed

+58
-32
lines changed

6 files changed

+58
-32
lines changed

.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ AllowShortLoopsOnASingleLine: false
88
BreakBeforeBraces: Attach
99
InsertBraces: true
1010
ColumnLimit: 100
11+
ForEachMacros: [ cilk_for ]
1112

1213
IncludeCategories:
1314
- Regex: '^<'

cpp/lib/Brackets.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ void Brackets::bracket(DxDy<View::R_XY> const &op1, DxDy<View::R_XY> const &op2,
1414
void Brackets::derivatives(View::C_XY const &op, DxDy<View::R_XY> output) const {
1515
DxDy<Buf::C_XY> Der_K{grid.KX, grid.KY};
1616
prepareDXY(op, Der_K);
17-
tf.bfft(Der_K.DX, output.DX);
18-
tf.bfft(Der_K.DY, output.DY);
17+
cilk_scope {
18+
cilk_spawn tf.bfft(Der_K.DX, output.DX);
19+
tf.bfft(Der_K.DY, output.DY);
20+
}
1921
}
2022

2123
Brackets::Buf::C_XY Brackets::halfBracket(DxDy<View::R_XY> derOp1, DxDy<View::R_XY> derOp2) const {
@@ -30,8 +32,10 @@ Brackets::Buf::C_XY Brackets::halfBracket(DxDy<View::R_XY> derOp1, DxDy<View::R_
3032

3133
[[nodiscard]] Brackets::Buf::C_XY Brackets::fullBracket(View::C_XY op1, View::C_XY op2) const {
3234
auto derOp1 = grid.dBufXY(), derOp2 = grid.dBufXY();
33-
derivatives(op1, derOp1);
34-
derivatives(op2, derOp2);
35+
cilk_scope {
36+
cilk_spawn derivatives(op1, derOp1);
37+
derivatives(op2, derOp2);
38+
}
3539

3640
return halfBracket(derOp1, derOp2);
3741
}

cpp/lib/Filter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void HouLiFilterCached1DVector::operator()(Grid::View::C_XY view) const {
6868

6969
static_assert(view.stride(0) == 1); // contiguous in kx
7070

71-
for (int ky = 0; ky < grid.KY; ky += KY_TILE) {
71+
cilk_for (int ky = 0; ky < grid.KY; ky += KY_TILE) {
7272
// avoid std::vector dereference inside loop:
7373
// broadcast fy value into vector
7474
std::array<VReal, KY_TILE> vfy;

cpp/lib/Naive.cpp

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,26 @@ void Naive::run(Dim N, Dim saveInterval) {
8484
}
8585

8686
hyper = HyperCoefficients::calculate(dt, g);
87-
exp_nu.update(hyper, dt);
88-
exp_nu_g.update(hyper, dt);
89-
exp_eta.update(hyper, dt);
90-
exp_gm.update(hyper, dt);
87+
cilk_scope {
88+
cilk_spawn exp_nu.update(hyper, dt);
89+
cilk_spawn exp_nu_g.update(hyper, dt);
90+
cilk_spawn exp_eta.update(hyper, dt);
91+
exp_gm.update(hyper, dt);
92+
}
9193

9294
spdlog::debug("dt: {}", dt);
9395

9496
// store results of nonlinear operators, as well as results of predictor step
9597
auto GM_K_Star = g.cBufMXY(), GM_Nonlinear_K = g.cBufMXY();
9698

99+
// spawn a bunch of stuff here
97100
// Compute N
98-
auto bracketPhiNE_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, N_E));
99-
auto bracketAParUEKPar_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dUEKPar);
101+
auto bracketPhiNE_K = cilk_spawn br.halfBracket(dPhi, Grid::sliceXY(dGM, N_E));
102+
auto bracketAParUEKPar_K = cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), dUEKPar);
100103

101104
// Compute A
105+
auto bracketUEParPhi_K = cilk_spawn br.halfBracket(dUEKPar, dPhi);
106+
102107
auto dPhiNeG2 = g.dBufXY();
103108
if (g.M > 2) {
104109
g.for_each_xy([&](Dim x, Dim y) {
@@ -115,7 +120,7 @@ void Naive::run(Dim N, Dim saveInterval) {
115120
}
116121

117122
auto bracketAParPhiG2Ne_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dPhiNeG2);
118-
auto bracketUEParPhi_K = br.halfBracket(dUEKPar, dPhi);
123+
cilk_sync;
119124

120125
g.for_each_kxky([&](Dim kx, Dim ky) {
121126
GM_Nonlinear_K(kx, ky, N_E) =
@@ -132,12 +137,12 @@ void Naive::run(Dim N, Dim saveInterval) {
132137

133138
if (g.M > 2) {
134139
// Compute G2
135-
auto bracketPhiG2_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, G_MIN));
140+
auto bracketPhiG2_K = cilk_spawn br.halfBracket(dPhi, Grid::sliceXY(dGM, G_MIN));
136141
auto bracketAParG3_K =
137-
br.halfBracket(Grid::sliceXY(dGM, A_PAR), Grid::sliceXY(dGM, G_MIN + 1));
142+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), Grid::sliceXY(dGM, G_MIN + 1));
138143

139144
// Compute G_{M-1}
140-
auto bracketPhiGLast_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, LAST));
145+
auto bracketPhiGLast_K = cilk_spawn br.halfBracket(dPhi, Grid::sliceXY(dGM, LAST));
141146
auto bracketAParGLast_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), Grid::sliceXY(dGM, LAST));
142147
g.for_each_kxky([&](Dim kx, Dim ky) {
143148
bracketAParGLast_K(kx, ky) *= nonlinear::GLastBracketFactor(g.M, g.kPerp2(kx, ky), hyper);
@@ -148,6 +153,7 @@ void Naive::run(Dim N, Dim saveInterval) {
148153
auto dBrLast = g.dBufXY();
149154
br.derivatives(bracketAParGLast_K, dBrLast);
150155
auto bracketTotalGLast_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dBrLast);
156+
cilk_sync;
151157

152158
g.for_each_kxky([&](Dim kx, Dim ky) {
153159
GM_Nonlinear_K(kx, ky, G_MIN) = nonlinear::G2(
@@ -162,17 +168,19 @@ void Naive::run(Dim N, Dim saveInterval) {
162168
dt / 2.0 * (1 + exp_gm(LAST) * exp_nu_g(kx, ky)) * GM_Nonlinear_K(kx, ky, LAST);
163169
});
164170

165-
auto dGMinusPlus = g.dBufXY();
166-
for (Dim m = G_MIN + 1; m < LAST; ++m) {
171+
cilk_for (Dim m = G_MIN + 1; m < LAST; ++m) {
172+
auto dGMinusPlus = g.dBufXY();
167173
g.for_each_xy([&](Dim x, Dim y) {
168174
dGMinusPlus.DX(x, y) =
169175
std::sqrt(m) * dGM.DX(x, y, m - 1) + std::sqrt(m + 1) * dGM.DX(x, y, m + 1);
170176
dGMinusPlus.DY(x, y) =
171177
std::sqrt(m) * dGM.DY(x, y, m - 1) + std::sqrt(m + 1) * dGM.DY(x, y, m + 1);
172178
});
173179

174-
auto bracketAParGMMinusPlus_K = br.halfBracket(Grid::sliceXY(dGM, A_PAR), dGMinusPlus);
180+
auto bracketAParGMMinusPlus_K =
181+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM, A_PAR), dGMinusPlus);
175182
auto bracketPhiGM_K = br.halfBracket(dPhi, Grid::sliceXY(dGM, m));
183+
cilk_sync;
176184

177185
g.for_each_kxky([&](Dim kx, Dim ky) {
178186
GM_Nonlinear_K(kx, ky, m) =
@@ -196,13 +204,14 @@ void Naive::run(Dim N, Dim saveInterval) {
196204

197205
auto dPhi_Loop = g.dBufXY(), dUEKPar_Loop = g.dBufXY();
198206
auto dGM_Loop = g.dBufMXY();
199-
br.derivatives(phi_K_New, dPhi_Loop);
200-
br.derivatives(ueKPar_K_New, dUEKPar_Loop);
207+
cilk_spawn br.derivatives(phi_K_New, dPhi_Loop);
208+
cilk_spawn br.derivatives(ueKPar_K_New, dUEKPar_Loop);
201209

202-
for (int m = 0; m < g.M; ++m) {
210+
cilk_for (int m = 0; m < g.M; ++m) {
203211
// TODO(OPT) not necessary if we bail (only up to G_MIN)
204212
br.derivatives(Grid::sliceXY(GM_K_Star, m), Grid::sliceXY(dGM_Loop, m));
205213
}
214+
cilk_sync;
206215

207216
// Corrector loop
208217
// TODO confirm that only m derivatives are needed at a time
@@ -239,8 +248,9 @@ void Naive::run(Dim N, Dim saveInterval) {
239248
});
240249

241250
auto bracketAParPhiG2Ne_K_Loop =
242-
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dPhiNeG2_Loop);
251+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dPhiNeG2_Loop);
243252
auto bracketUEParPhi_K_Loop = br.halfBracket(dUEKPar_Loop, dPhi_Loop);
253+
cilk_sync;
244254

245255
/// f_pred from Viriato
246256
auto GM_Nonlinear_K_Loop = g.cBufMXY();
@@ -272,11 +282,14 @@ void Naive::run(Dim N, Dim saveInterval) {
272282
spdlog::debug("sumAParRelError: {}, relative_error: {}", sumAParRelError, relative_error);
273283
// TODO(OPT) bail if relative error is large
274284

275-
DerivateNewMoment(A_PAR);
276-
br.derivatives(ueKPar_K_New, dUEKPar_Loop);
285+
auto bracketPhiNE_K_Loop = cilk_spawn br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, N_E));
277286

278-
auto bracketPhiNE_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, N_E));
287+
cilk_scope {
288+
cilk_spawn DerivateNewMoment(A_PAR);
289+
br.derivatives(ueKPar_K_New, dUEKPar_Loop);
290+
}
279291
auto bracketAParUEKPar_K_Loop = br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dUEKPar_Loop);
292+
cilk_sync;
280293

281294
g.for_each_kxky([&](Dim kx, Dim ky) {
282295
GM_Nonlinear_K_Loop(kx, ky, N_E) =
@@ -290,13 +303,17 @@ void Naive::run(Dim N, Dim saveInterval) {
290303
(kx | ky) == 0 ? 0 : nonlinear::phi(momentsNew_K(kx, ky, N_E), g.kPerp2(kx, ky));
291304
});
292305

293-
br.derivatives(phi_K_New, dPhi_Loop);
306+
cilk_spawn br.derivatives(phi_K_New, dPhi_Loop);
294307
DerivateNewMoment(N_E);
308+
cilk_sync;
309+
295310
if (g.M > 2) {
296311
// Compute G2
297-
auto bracketPhiG2_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, G_MIN));
312+
auto bracketPhiG2_K_Loop =
313+
cilk_spawn br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, G_MIN));
298314
auto bracketAParG3_K_Loop =
299315
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), Grid::sliceXY(dGM_Loop, G_MIN + 1));
316+
cilk_sync;
300317

301318
g.for_each_kxky([&](Dim kx, Dim ky) {
302319
GM_Nonlinear_K_Loop(kx, ky, G_MIN) =
@@ -319,8 +336,9 @@ void Naive::run(Dim N, Dim saveInterval) {
319336
});
320337

321338
auto bracketAParGMMinusPlus_K_Loop =
322-
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dGMinusPlus_Loop);
339+
cilk_spawn br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dGMinusPlus_Loop);
323340
auto bracketPhiGM_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, m));
341+
cilk_sync;
324342

325343
g.for_each_kxky([&](Dim kx, Dim ky) {
326344
GM_Nonlinear_K_Loop(kx, ky, m) = nonlinear::GM(m, bracketPhiGM_K_Loop(kx, ky),
@@ -336,7 +354,8 @@ void Naive::run(Dim N, Dim saveInterval) {
336354
}
337355

338356
// Compute G_{M-1}
339-
auto bracketPhiGLast_K_Loop = br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, LAST));
357+
auto bracketPhiGLast_K_Loop =
358+
cilk_spawn br.halfBracket(dPhi_Loop, Grid::sliceXY(dGM_Loop, LAST));
340359
auto bracketAParGLast_K_Loop =
341360
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), Grid::sliceXY(dGM_Loop, LAST));
342361
g.for_each_kxky([&](Dim kx, Dim ky) {
@@ -351,6 +370,7 @@ void Naive::run(Dim N, Dim saveInterval) {
351370
br.derivatives(bracketAParGLast_K_Loop, dBrLast_Loop);
352371
auto bracketTotalGLast_K_Loop =
353372
br.halfBracket(Grid::sliceXY(dGM_Loop, A_PAR), dBrLast_Loop);
373+
cilk_sync;
354374

355375
g.for_each_kxky([&](Dim kx, Dim ky) {
356376
GM_Nonlinear_K_Loop(kx, ky, LAST) =

cpp/lib/PrepareDerivatives.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ void PrepareDerivativesVector::operator()(View::C_XY const &in, DxDy<View::C_XY>
1717
VReal const kx_v_init{[](int idx, int) { return Real(idx / 2); }};
1818
VReal const norm_v{XYNorm};
1919

20-
for (Dim ky = 0; ky < grid.KY; ky += KY_TILE) {
20+
cilk_for (Dim ky = 0; ky < grid.KY; ky += KY_TILE) {
2121
// broadcast ky values
2222
using TileReal = std::array<VReal, KY_TILE>;
2323
TileReal ky_v;

cpp/lib/grid.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include "cilk.hpp"
34
#include "constants.hpp"
45
#include "typedefs.hpp"
56

@@ -103,7 +104,7 @@ struct Grid {
103104

104105
/// Iterate in real space
105106
void for_each_xy(std::invocable<Dim, Dim> auto fun) const {
106-
for (Dim y = 0; y < Y; ++y) {
107+
cilk_for (Dim y = 0; y < Y; ++y) {
107108
for (Dim x = 0; x < X; ++x) {
108109
fun(x, y);
109110
}
@@ -112,7 +113,7 @@ struct Grid {
112113

113114
/// Iterate in phase space
114115
void for_each_kxky(std::invocable<Dim, Dim> auto fun) const {
115-
for (Dim ky = 0; ky < KY; ++ky) {
116+
cilk_for (Dim ky = 0; ky < KY; ++ky) {
116117
for (Dim kx = 0; kx < KX; ++kx) {
117118
fun(kx, ky);
118119
}

0 commit comments

Comments
 (0)