Skip to content

Commit 4180ec3

Browse files
committed
GW Spectrum computation done fully on GPUs now
1 parent a0e18b4 commit 4180ec3

File tree

2 files changed

+170
-77
lines changed

2 files changed

+170
-77
lines changed

source/gravitational_waves.cpp

Lines changed: 128 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "fft.h"
33
#include "hdf5_utils.h"
44
#include "sledgehamr_utils.h"
5+
#include <AMReX_ParallelReduce.H>
56

67
namespace sledgehamr {
78

@@ -67,6 +68,9 @@ void GravitationalWaves::ComputeSpectrum(
6768
int comps[6];
6869
modifier->SelectComponents(comps);
6970

71+
std::chrono::steady_clock::time_point start_time =
72+
std::chrono::steady_clock::now();
73+
7074
for (int i = 0; i < 6; ++i) {
7175
#ifdef OLD_FFT
7276
const amrex::BoxArray &ba = ld.boxArray();
@@ -78,6 +82,14 @@ void GravitationalWaves::ComputeSpectrum(
7882
sim->geom[lev], false, zero_padding);
7983
}
8084

85+
std::chrono::steady_clock::time_point end_time =
86+
std::chrono::steady_clock::now();
87+
double duration_ms = static_cast<double>(
88+
std::chrono::duration_cast<std::chrono::microseconds>(end_time -
89+
start_time)
90+
.count());
91+
// amrex::Print() << "FFTs: " << duration_ms << std::endl;
92+
8193
double dk = 2. * M_PI / L;
8294
double dimN6 = pow(dimN, 6);
8395

@@ -93,15 +105,105 @@ void GravitationalWaves::ComputeSpectrum(
93105
k <= std::sqrt(3.) / 2. * static_cast<double>(dimN) + .5; ++k) {
94106
ks.push_back(k * k);
95107
}
96-
NTHREADS = 256;
108+
NTHREADS = 16;
97109
}
98-
99110
int kmax = ks.size();
111+
amrex::Gpu::AsyncArray<double> async_index_to_k(sim->index_to_k.data(),
112+
sim->index_to_k.size());
113+
double *index_to_k = async_index_to_k.data();
114+
115+
start_time = std::chrono::steady_clock::now();
116+
117+
#ifdef AMREX_USE_GPU
118+
if (l_unbinned) {
119+
amrex::Abort(
120+
"Unbinned gw spectra on GPUs are currently not supported!");
121+
}
122+
123+
bool l_unbinned = unbinned;
124+
125+
unsigned long SpecLen = kmax;
126+
std::vector<double> gw_spectrum(SpecLen, 0.0);
127+
amrex::Gpu::DeviceVector<double> d_data(SpecLen, 0.0);
128+
double *const AMREX_RESTRICT dptr_data = d_data.dataPtr();
129+
130+
for (amrex::MFIter mfi(du_real[0], false); mfi.isValid(); ++mfi) {
131+
const amrex::Box &bx = mfi.tilebox();
132+
133+
const amrex::Array4<double> du_real_arr[6] = {
134+
du_real[0].array(mfi), du_real[1].array(mfi),
135+
du_real[2].array(mfi), du_real[3].array(mfi),
136+
du_real[4].array(mfi), du_real[5].array(mfi)};
137+
138+
const amrex::Array4<double> du_imag_arr[6] = {
139+
du_imag[0].array(mfi), du_imag[1].array(mfi),
140+
du_imag[2].array(mfi), du_imag[3].array(mfi),
141+
du_imag[4].array(mfi), du_imag[5].array(mfi)};
142+
143+
amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int a, int b,
144+
int c) noexcept {
145+
// To account of negative frequencies
146+
double multpl = (a == 0 || a == dimN / 2) ? 1. : 2.;
147+
int abc[3] = {a, b, c};
148+
double running_sum = 0;
149+
150+
int li = a >= dimN / 2 ? a - dimN : a;
151+
int lj = b >= dimN / 2 ? b - dimN : b;
152+
int lk = c >= dimN / 2 ? c - dimN : c;
153+
unsigned int sq = li * li + lj * lj + lk * lk;
154+
unsigned long index;
155+
if (l_unbinned) {
156+
// index = std::lower_bound(ks.begin(), ks.end(), sq) -
157+
// ks.begin();
158+
} else {
159+
index =
160+
static_cast<long>(std::sqrt(static_cast<double>(sq)) + .5);
161+
}
162+
163+
for (int i = 0; i < 3; ++i) {
164+
for (int j = 0; j < 3; ++j) {
165+
for (int l = 0; l < 3; ++l) {
166+
for (int m = 0; m < 3; ++m) {
167+
running_sum +=
168+
gw_GetLambda(i, j, l, m, abc, index_to_k) *
169+
multpl *
170+
(du_real_arr[mat[i][j]].operator()(a, b, c) *
171+
du_real_arr[mat[l][m]].operator()(a, b,
172+
c) +
173+
du_imag_arr[mat[i][j]].operator()(a, b, c) *
174+
du_imag_arr[mat[l][m]].operator()(a, b,
175+
c));
176+
}
177+
}
178+
}
179+
}
180+
amrex::HostDevice::Atomic::Add(&dptr_data[index],
181+
running_sum / dimN6);
182+
});
183+
}
184+
185+
end_time = std::chrono::steady_clock::now();
186+
duration_ms = static_cast<double>(
187+
std::chrono::duration_cast<std::chrono::microseconds>(end_time -
188+
start_time)
189+
.count());
190+
// amrex::Print() << "Sum: " << duration_ms << std::endl;
191+
start_time = std::chrono::steady_clock::now();
192+
193+
// blocking copy from device to host
194+
amrex::Gpu::copy(amrex::Gpu::deviceToHost, d_data.begin(), d_data.end(),
195+
gw_spectrum.begin());
196+
197+
// reduced sum over mpi ranks
198+
amrex::ParallelDescriptor::ReduceRealSum(
199+
gw_spectrum.data(), static_cast<int>(gw_spectrum.size()),
200+
amrex::ParallelDescriptor::IOProcessorNumber());
201+
202+
#else // ifdef AMREX_USE_GPU
203+
100204
unsigned long SpecLen = kmax * NTHREADS;
101-
std::unique_ptr<double[]> gw_spectrum(new double[SpecLen]);
102-
std::fill_n(gw_spectrum.get(), SpecLen, 0.0);
205+
std::vector<double> gw_spectrum(SpecLen, 0.0);
103206

104-
// Non-trivial load-balancing here. Not sure what wins.
105207
#pragma omp parallel num_threads(std::min(NTHREADS, omp_get_max_threads()))
106208
for (amrex::MFIter mfi(du_real[0], true); mfi.isValid(); ++mfi) {
107209
const amrex::Box &bx = mfi.tilebox();
@@ -156,7 +258,8 @@ void GravitationalWaves::ComputeSpectrum(
156258
for (int l = 0; l < 3; ++l) {
157259
for (int m = 0; m < 3; ++m) {
158260
running_sum +=
159-
GetLambda(i, j, l, m, abc, dimN) *
261+
gw_GetLambda(i, j, l, m, abc,
262+
index_to_k) *
160263
multpl *
161264
(du_real_arr[mat[i][j]]->operator()(
162265
a, b, c) *
@@ -175,94 +278,47 @@ void GravitationalWaves::ComputeSpectrum(
175278
}
176279
}
177280
}
281+
282+
end_time = std::chrono::steady_clock::now();
283+
duration_ms = static_cast<double>(
284+
std::chrono::duration_cast<std::chrono::microseconds>(end_time -
285+
start_time)
286+
.count());
287+
// amrex::Print() << "Sum: " << duration_ms << std::endl;
288+
start_time = std::chrono::steady_clock::now();
289+
178290
for (int a = 1; a < NTHREADS; ++a) {
179291
for (int c = 0; c < kmax; ++c) {
180292
gw_spectrum[c] += gw_spectrum[a * kmax + c];
181293
}
182294
}
183295

184296
amrex::ParallelDescriptor::ReduceRealSum(
185-
gw_spectrum.get(), kmax,
297+
&(gw_spectrum[0]), kmax,
186298
amrex::ParallelDescriptor::IOProcessorNumber());
187299

188300
#pragma omp parallel for
189301
for (int c = 0; c < kmax; ++c) {
190302
gw_spectrum[c] /= dimN6;
191303
}
192304

305+
#endif // AMREX_USE_GPU
306+
end_time = std::chrono::steady_clock::now();
307+
duration_ms = static_cast<double>(
308+
std::chrono::duration_cast<std::chrono::microseconds>(end_time -
309+
start_time)
310+
.count());
311+
// amrex::Print() << "Reduce: " << duration_ms << std::endl;
312+
193313
if (amrex::ParallelDescriptor::IOProcessor()) {
194314
const int nparams = 6;
195315
double header_data[nparams] = {
196316
ld.t, (double)dimN, (double)kmax,
197317
L, (double)zero_padding, (double)unbinned};
198318
utils::hdf5::Write(file_id, "Header", header_data, nparams);
199319
utils::hdf5::Write(file_id, "k", &(ks[0]), kmax);
200-
utils::hdf5::Write(file_id, "Spectrum", gw_spectrum.get(), kmax);
320+
utils::hdf5::Write(file_id, "Spectrum", &gw_spectrum[0], kmax);
201321
}
202322
}
203323

204-
/** @brief Converts a given index to k-space given a projection type.
205-
* @param a Index to be converted.
206-
* @param N Total index length.
207-
* @return k-value.
208-
*/
209-
inline double GravitationalWaves::IndexToK(int a, int N) {
210-
return sim->index_to_k[a];
211-
/*
212-
double n_tilde = a - N <= -N / 2 - 1 ? a : a - N;
213-
double two_pi_n_tilde = 2. * M_PI / static_cast<double>(N) *
214-
n_tilde;
215-
216-
if (projection_type == 1) {
217-
return sin(two_pi_n_tilde);
218-
} else if (projection_type == 2) {
219-
return (8. * sin(two_pi_n_tilde) - sin(2. * two_pi_n_tilde))
220-
/ 6.;
221-
}
222-
223-
return 0.;
224-
*/
225-
}
226-
227-
/** @brief Projects all indicies.
228-
* @param i i-index.
229-
* @param j j-index.
230-
* @param abc a-, b-, and c- index.
231-
* @param N Total index length.
232-
* @return Projected value.
233-
*/
234-
inline double GravitationalWaves::GetProjection(int i, int j, int abc[3],
235-
int N) {
236-
if (abc[0] == 0 && abc[1] == 0 && abc[2] == 0)
237-
return 0.;
238-
239-
double abc_d[3];
240-
abc_d[0] = IndexToK(abc[0], N);
241-
abc_d[1] = IndexToK(abc[1], N);
242-
abc_d[2] = IndexToK(abc[2], N);
243-
244-
double norm =
245-
abc_d[0] * abc_d[0] + abc_d[1] * abc_d[1] + abc_d[2] * abc_d[2];
246-
int l = amrex::min(i, j);
247-
int m = amrex::max(i, j);
248-
249-
double proj = abc_d[l] * abc_d[m];
250-
251-
return static_cast<double>(l == m) - proj / norm;
252-
}
253-
254-
/** @brief Computes lambda from projections.
255-
* @param i i-index.
256-
* @param j j-index.
257-
* @param l l-index.
258-
* @param m m-index.
259-
* @param abc a-, b-, and c-index.
260-
* @param N Total index length.
261-
* @return Lambda
262-
*/
263-
inline double GravitationalWaves::GetLambda(int i, int j, int l, int m,
264-
int abc[3], int N) {
265-
return GetProjection(i, l, abc, N) * GetProjection(j, m, abc, N) -
266-
GetProjection(i, j, abc, N) * GetProjection(l, m, abc, N) / 2.;
267-
}
268324
}; // namespace sledgehamr

source/gravitational_waves.h

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,6 @@ class GravitationalWaves {
4343
};
4444

4545
private:
46-
double IndexToK(int a, int N);
47-
double GetProjection(int i, int j, int abc[3], int N);
48-
double GetLambda(int i, int j, int l, int m, int abc[3], int N);
49-
5046
/** @brief Pointer to the simulation.
5147
*/
5248
Sledgehamr *sim;
@@ -92,9 +88,50 @@ struct GravitationalWavesSpectrumModifier {
9288

9389
virtual void FourierSpaceModifications(amrex::MultiFab du_real[6],
9490
amrex::MultiFab du_imag[6],
95-
const double dk, const int dimN){};
91+
const double dk, const int dimN) {};
9692
};
9793

94+
/** @brief Projects all indicies.
95+
* @param i i-index.
96+
* @param j j-index.
97+
* @param abc a-, b-, and c- index.
98+
* @param N Total index length.
99+
* @return Projected value.
100+
*/
101+
AMREX_GPU_DEVICE AMREX_FORCE_INLINE double gw_GetProjection(int i, int j,
102+
double abc[3]) {
103+
double norm = abc[0] * abc[0] + abc[1] * abc[1] + abc[2] * abc[2];
104+
int l = amrex::min(i, j);
105+
int m = amrex::max(i, j);
106+
107+
double proj = abc[l] * abc[m];
108+
109+
return static_cast<double>(l == m) - proj / norm;
110+
}
111+
112+
/** @brief Computes lambda from projections.
113+
* @param i i-index.
114+
* @param j j-index.
115+
* @param l l-index.
116+
* @param m m-index.
117+
* @param abc a-, b-, and c-index.
118+
* @param N Total index length.
119+
* @return Lambda
120+
*/
121+
AMREX_GPU_DEVICE AMREX_FORCE_INLINE double
122+
gw_GetLambda(int i, int j, int l, int m, int abc[3], const double *index_to_k) {
123+
if (abc[0] == 0 && abc[1] == 0 && abc[2] == 0)
124+
return 0.;
125+
126+
double abc_d[3];
127+
abc_d[0] = index_to_k[abc[0]];
128+
abc_d[1] = index_to_k[abc[1]];
129+
abc_d[2] = index_to_k[abc[2]];
130+
131+
return gw_GetProjection(i, l, abc_d) * gw_GetProjection(j, m, abc_d) -
132+
gw_GetProjection(i, j, abc_d) * gw_GetProjection(l, m, abc_d) / 2.;
133+
}
134+
98135
}; // namespace sledgehamr
99136

100137
#endif // SLEDGEHAMR_GRAVITATIONAL_WAVES_H_

0 commit comments

Comments
 (0)