Skip to content

Commit aeaf549

Browse files
committed
format
1 parent d9cf163 commit aeaf549

File tree

1 file changed

+23
-17
lines changed

1 file changed

+23
-17
lines changed

include/cufinufft/common_kernels.hpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -253,25 +253,27 @@ __global__ FINUFFT_FLATTEN void interp_subprob(
253253
const auto subpidx = blockIdx.x;
254254
const auto bidx = loadReadOnly(p.subprob_to_bin + subpidx);
255255
const auto binsubp_idx = subpidx - loadReadOnly(p.subprobstartpts + bidx);
256-
const auto ptstart = loadReadOnly(p.binstartpts + bidx) + binsubp_idx * p.opts.gpu_maxsubprobsize;
257-
const auto nupts = min(p.opts.gpu_maxsubprobsize,
258-
loadReadOnly(p.binsize + bidx) - binsubp_idx * p.opts.gpu_maxsubprobsize);
256+
const auto ptstart =
257+
loadReadOnly(p.binstartpts + bidx) + binsubp_idx * p.opts.gpu_maxsubprobsize;
258+
const auto nupts =
259+
min(p.opts.gpu_maxsubprobsize,
260+
loadReadOnly(p.binsize + bidx) - binsubp_idx * p.opts.gpu_maxsubprobsize);
259261

260262
auto offset = compute_offset<ndim>(bidx, nbins, binsizes);
261263

262264
constexpr auto ns_2 = (ns + 1) / 2;
263265
constexpr auto rounded_ns = ns_2 * 2;
264266

265-
shared_mem_copy_helper<T, ndim, ns>(binsizes, offset, p.nf123,
266-
[fw, fwshared](int idx_shared, int idx_global) {
267-
fwshared[idx_shared] = loadReadOnly(fw + idx_global);
268-
});
267+
shared_mem_copy_helper<T, ndim, ns>(
268+
binsizes, offset, p.nf123, [fw, fwshared](int idx_shared, int idx_global) {
269+
fwshared[idx_shared] = loadReadOnly(fw + idx_global);
270+
});
269271
__syncthreads();
270272

271273
for (int i = threadIdx.x; i < nupts; i += blockDim.x) {
272-
const int idx = ptstart + i;
274+
const int idx = ptstart + i;
273275
const auto nuptsidx = loadReadOnly(p.idxnupts + idx);
274-
auto [ker, start] = get_kerval_and_local_start<T, KEREVALMETH, ndim, ns>(
276+
auto [ker, start] = get_kerval_and_local_start<T, KEREVALMETH, ndim, ns>(
275277
nuptsidx, p.xyz, p.nf123, offset, sigma, es_c, es_beta);
276278

277279
cuda_complex<T> cnow{0, 0};
@@ -488,9 +490,11 @@ __global__ FINUFFT_FLATTEN void spread_subprob(
488490
const auto subpidx = blockIdx.x;
489491
const auto bidx = loadReadOnly(p.subprob_to_bin + subpidx);
490492
const auto binsubp_idx = subpidx - loadReadOnly(p.subprobstartpts + bidx);
491-
const auto ptstart = loadReadOnly(p.binstartpts + bidx) + binsubp_idx * p.opts.gpu_maxsubprobsize;
492-
const auto nupts = min(p.opts.gpu_maxsubprobsize,
493-
loadReadOnly(p.binsize + bidx) - binsubp_idx * p.opts.gpu_maxsubprobsize);
493+
const auto ptstart =
494+
loadReadOnly(p.binstartpts + bidx) + binsubp_idx * p.opts.gpu_maxsubprobsize;
495+
const auto nupts =
496+
min(p.opts.gpu_maxsubprobsize,
497+
loadReadOnly(p.binsize + bidx) - binsubp_idx * p.opts.gpu_maxsubprobsize);
494498

495499
auto offset = compute_offset<ndim>(bidx, nbins, binsizes);
496500

@@ -507,9 +511,9 @@ __global__ FINUFFT_FLATTEN void spread_subprob(
507511
__syncthreads();
508512

509513
for (int i = threadIdx.x; i < nupts; i += blockDim.x) {
510-
const int idx = ptstart + i;
514+
const int idx = ptstart + i;
511515
const auto nuptsidx = loadReadOnly(p.idxnupts + idx);
512-
auto [ker, start] = get_kerval_and_local_start<T, KEREVALMETH, ndim, ns>(
516+
auto [ker, start] = get_kerval_and_local_start<T, KEREVALMETH, ndim, ns>(
513517
nuptsidx, p.xyz, p.nf123, offset, sigma, es_c, es_beta);
514518

515519
const auto cnow = loadReadOnly(c + nuptsidx);
@@ -677,9 +681,11 @@ __global__ FINUFFT_FLATTEN void spread_output_driven(
677681

678682
const int bidx = loadReadOnly(p.subprob_to_bin + blockIdx.x);
679683
const int binsubp_idx = blockIdx.x - loadReadOnly(p.subprobstartpts + bidx);
680-
const int ptstart = loadReadOnly(p.binstartpts + bidx) + binsubp_idx * p.opts.gpu_maxsubprobsize;
681-
const int nupts = min(p.opts.gpu_maxsubprobsize,
682-
loadReadOnly(p.binsize + bidx) - binsubp_idx * p.opts.gpu_maxsubprobsize);
684+
const int ptstart =
685+
loadReadOnly(p.binstartpts + bidx) + binsubp_idx * p.opts.gpu_maxsubprobsize;
686+
const int nupts =
687+
min(p.opts.gpu_maxsubprobsize,
688+
loadReadOnly(p.binsize + bidx) - binsubp_idx * p.opts.gpu_maxsubprobsize);
683689

684690
auto offset = compute_offset<ndim>(bidx, nbins, binsizes);
685691

0 commit comments

Comments
 (0)