@@ -1140,11 +1140,10 @@ int FINUFFT_PLAN_T<TF>::execute_internal(TC *cj, TC *fk, bool adjoint, int ntran
11401140 // STEP 0: pre-phase (possibly) the c_j input strengths into c'_j batch...
11411141 timer.restart ();
11421142#pragma omp parallel for num_threads(opts.nthreads) // or batchSize?
1143- for (int i = 0 ; i < thisBatchSize; i++) {
1144- BIGINT ioff = i * nj;
1145- for (BIGINT j = 0 ; j < nj; ++j) {
1146- CpBatch[ioff + j] = prephase[j] * cjb[ioff + j];
1147- }
1143+ for (BIGINT j = 0 ; j < nj; ++j) {
1144+ auto phase = prephase[j];
1145+ for (int i = 0 ; i < thisBatchSize; i++)
1146+ CpBatch[i * nj + j] = phase * cjb[i * nj + j];
11481147 }
11491148 t_phase += timer.elapsedsec ();
11501149
@@ -1164,21 +1163,17 @@ int FINUFFT_PLAN_T<TF>::execute_internal(TC *cj, TC *fk, bool adjoint, int ntran
11641163 // STEP 3: apply deconvolve (precomputed 1/phiHat(targ_k), phasing too)...
11651164 timer.restart ();
11661165#pragma omp parallel for num_threads(opts.nthreads)
1167- for (int i = 0 ; i < thisBatchSize; i++) {
1168- BIGINT ioff = i * nk;
1169- for (BIGINT k = 0 ; k < nk; ++k) fkb[ioff + k] *= deconv[k];
1170- }
1166+ for (BIGINT k = 0 ; k < nk; ++k)
1167+ for (int i = 0 ; i < thisBatchSize; i++) fkb[i * nk + k] *= deconv[k];
11711168 t_deconv += timer.elapsedsec ();
11721169 } else { // adjoint mode
11731170 // STEP 0: apply deconvolve (precomputed 1/phiHat(targ_k), conjugate phasing
11741171 // too)... write output into CpBatch
11751172 timer.restart ();
11761173#pragma omp parallel for num_threads(opts.nthreads)
1177- for (int i = 0 ; i < thisBatchSize; i++) {
1178- BIGINT ioff = i * nk;
1179- for (BIGINT k = 0 ; k < nk; ++k)
1180- CpBatch[ioff + k] = fkb[ioff + k] * conj (deconv[k]);
1181- }
1174+ for (BIGINT k = 0 ; k < nk; ++k)
1175+ for (int i = 0 ; i < thisBatchSize; i++)
1176+ CpBatch[i * nk + k] = fkb[i * nk + k] * conj (deconv[k]);
11821177 t_deconv += timer.elapsedsec ();
11831178 // STEP 1: adjoint type 2 (i.e. type 1) NUFFT from CpBatch to fwBatch...
11841179 timer.restart ();
@@ -1194,11 +1189,9 @@ int FINUFFT_PLAN_T<TF>::execute_internal(TC *cj, TC *fk, bool adjoint, int ntran
11941189 // STEP 3: post-phase (possibly) the c_j output strengths (in place) ...
11951190 timer.restart ();
11961191#pragma omp parallel for num_threads(opts.nthreads) // or batchSize?
1197- for (int i = 0 ; i < thisBatchSize; i++) {
1198- BIGINT ioff = i * nj;
1199- for (BIGINT j = 0 ; j < nj; ++j) {
1200- cjb[ioff + j] *= conj (prephase[j]); // FIXME
1201- }
1192+ for (BIGINT j = 0 ; j < nj; ++j) {
1193+ auto phase = conj (prephase[j]);
1194+ for (int i = 0 ; i < thisBatchSize; i++) cjb[i * nj + j] *= phase;
12021195 }
12031196 t_phase += timer.elapsedsec ();
12041197 }
0 commit comments