|
| 1 | +#include <finufft/defs.h> |
| 2 | +#include <finufft/spreadinterp.h> |
| 3 | +#include <finufft/utils_precindep.h> |
| 4 | + |
| 5 | +#include <cmath> |
| 6 | +#include <cstdio> |
| 7 | +#include <cstdlib> |
| 8 | +#include <vector> |
| 9 | + |
| 10 | +using namespace finufft::spreadinterp; |
| 11 | +using namespace finufft::utils; // for timer |
| 12 | + |
| 13 | +void usage() { |
| 14 | + printf("usage: spreadtestnd dims [M N [dir [sort [flags [debug [kerpad [kerevalmeth [upsampfac]]]]]]]]\n\twhere " |
| 15 | + "dims=1,2 or 3\n\tM=# nonuniform pts\n\tN=# uniform pts\n\tdir=spreader direction " |
| 16 | + "[spread/interpolate]\n\tsort=0 (don't sort NU pts), 1 (do), or 2 (maybe sort; default)\n\tflags: expert " |
| 17 | + "timing flags, 0 is default (see spreadinterp.h)\n\tdebug=0 (less text out), 1 (more), 2 (lots)\n\tkerpad=0 " |
| 18 | + "(no pad to mult of 4), 1 (do, for kerevalmeth=0 only)\n\tkerevalmeth=0 (direct), 1 (Horner " |
| 19 | + "ppval)\n\tupsampfac>1; 2 or 1.25 for Horner\n\nexample: ./spreadtestndall 1 1e6 1e6 1 2 0 1\n"); |
| 20 | +} |
| 21 | + |
| 22 | +int main(int argc, char *argv[]) |
| 23 | +/* Test executable for the 1D, 2D, or 3D C++ spreader, both directions. |
| 24 | + * It checks speed, and basic correctness via the grid sum of the result. |
| 25 | + * See usage() for usage. Note it currently tests only pirange=0, which is not |
| 26 | + * the use case in finufft, and can differ in speed (see devel/foldrescale*) |
| 27 | + * |
| 28 | + * Example: spreadtestndall 3 1e7 1e7 1 1 |
| 29 | + * |
| 30 | + * Compilation (also check ../makefile): |
| 31 | + * g++ spreadtestndall.cpp ../src/spreadinterp.o ../src/utils.o -o spreadtestndall -fPIC -Ofast -funroll-loops |
| 32 | + * -fopenmp |
| 33 | + * |
| 34 | + */ |
| 35 | +{ |
| 36 | + int d = 3; // Cmd line args & their defaults: default #dims |
| 37 | + double w; |
| 38 | + int dir = 1; // default (eg 1e-6 has nspread=7) |
| 39 | + BIGINT M = 1e6; // default # NU pts |
| 40 | + BIGINT roughNg = 1e6; // default # U pts |
| 41 | + int sort = 2; // spread_sort |
| 42 | + int flags = 0; // default |
| 43 | + int debug = 0; // default |
| 44 | + int kerpad = 0; // default |
| 45 | + int kerevalmeth = 1; // default: Horner |
| 46 | + FLT upsampfac = 2.0; // standard |
| 47 | + |
| 48 | + if (argc < 2 || argc == 3 || argc > 11) { |
| 49 | + usage(); |
| 50 | + return (argc > 1); |
| 51 | + } |
| 52 | + sscanf(argv[1], "%d", &d); |
| 53 | + if (d < 1 || d > 3) { |
| 54 | + printf("d must be 1, 2 or 3!\n"); |
| 55 | + usage(); |
| 56 | + return 1; |
| 57 | + } |
| 58 | + if (argc > 2) { |
| 59 | + sscanf(argv[2], "%lf", &w); |
| 60 | + M = (BIGINT)w; // to read "1e6" right! |
| 61 | + if (M < 1) { |
| 62 | + printf("M (# NU pts) must be positive!\n"); |
| 63 | + usage(); |
| 64 | + return 1; |
| 65 | + } |
| 66 | + sscanf(argv[3], "%lf", &w); |
| 67 | + roughNg = (BIGINT)w; |
| 68 | + if (roughNg < 1) { |
| 69 | + printf("N (# U pts) must be positive!\n"); |
| 70 | + usage(); |
| 71 | + return 1; |
| 72 | + } |
| 73 | + } |
| 74 | + if (argc > 4) |
| 75 | + sscanf(argv[4], "%d", &dir); |
| 76 | + if (argc > 5) { |
| 77 | + sscanf(argv[5], "%d", &sort); |
| 78 | + if ((sort != 0) && (sort != 1) && (sort != 2)) { |
| 79 | + printf("sort must be 0, 1 or 2!\n"); |
| 80 | + usage(); |
| 81 | + return 1; |
| 82 | + } |
| 83 | + } |
| 84 | + if (argc > 6) |
| 85 | + sscanf(argv[6], "%d", &flags); |
| 86 | + if (argc > 7) { |
| 87 | + sscanf(argv[7], "%d", &debug); |
| 88 | + if ((debug < 0) || (debug > 2)) { |
| 89 | + printf("debug must be 0, 1 or 2!\n"); |
| 90 | + usage(); |
| 91 | + return 1; |
| 92 | + } |
| 93 | + } |
| 94 | + if (argc > 8) { |
| 95 | + sscanf(argv[8], "%d", &kerpad); |
| 96 | + if ((kerpad < 0) || (kerpad > 1)) { |
| 97 | + printf("kerpad must be 0 or 1!\n"); |
| 98 | + usage(); |
| 99 | + return 1; |
| 100 | + } |
| 101 | + } |
| 102 | + if (argc > 9) { |
| 103 | + sscanf(argv[9], "%d", &kerevalmeth); |
| 104 | + if ((kerevalmeth < 0) || (kerevalmeth > 1)) { |
| 105 | + printf("kerevalmeth must be 0 or 1!\n"); |
| 106 | + usage(); |
| 107 | + return 1; |
| 108 | + } |
| 109 | + } |
| 110 | + if (argc > 10) { |
| 111 | + sscanf(argv[10], "%lf", &w); |
| 112 | + upsampfac = (FLT)w; |
| 113 | + if (upsampfac <= 1.0) { |
| 114 | + printf("upsampfac must be >1.0!\n"); |
| 115 | + usage(); |
| 116 | + return 1; |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + BIGINT N = (BIGINT)round(pow(roughNg, 1.0 / d)); // Fourier grid size per dim |
| 121 | + BIGINT Ng = (BIGINT)pow(N, d); // actual total grid points |
| 122 | + BIGINT N2 = (d >= 2) ? N : 1, N3 = (d == 3) ? N : 1; // the y and z grid sizes |
| 123 | + std::vector<FLT> kx(M), ky(1), kz(1), d_nonuniform(2 * M); // NU, Re & Im |
| 124 | + if (d > 1) |
| 125 | + ky.resize(M); // only alloc needed coords |
| 126 | + if (d > 2) |
| 127 | + kz.resize(M); |
| 128 | + std::vector<FLT> d_uniform(2 * Ng); // Re and Im |
| 129 | + |
| 130 | + finufft_spread_opts opts; |
| 131 | + const auto max_digits = []() { |
| 132 | + if (std::is_same<FLT, double>::value) { |
| 133 | + return 17; |
| 134 | + } else { |
| 135 | + return 9; |
| 136 | + } |
| 137 | + }(); |
| 138 | + for (int digits = 2; digits < max_digits; digits++) { |
| 139 | + const auto tol = 10.0 * pow(10.0, -digits); |
| 140 | + printf("digits=%d, tol = %.3g\n", digits, FLT(tol)); |
| 141 | + int ier_set = setup_spreader(opts, tol, upsampfac, kerevalmeth, debug, 1, d); |
| 142 | + |
| 143 | + if (ier_set > 1) { // exit gracefully if can't set up. |
| 144 | + printf("error when setting up spreader (ier_set=%d)!\n", ier_set); |
| 145 | + return ier_set; |
| 146 | + } |
| 147 | + opts.debug = debug; // print more diagnostics? |
| 148 | + opts.sort = sort; |
| 149 | + opts.flags = flags; |
| 150 | + opts.kerpad = kerpad; |
| 151 | + opts.upsampfac = upsampfac; |
| 152 | + opts.nthreads = 0; // max # threads used, or 0 to use what's avail |
| 153 | + opts.sort_threads = 0; |
| 154 | + opts.kerpad = 0; |
| 155 | + // opts.max_subproblem_size = 1e5; |
| 156 | + FLT maxerr, ansmod; |
| 157 | + |
| 158 | + // spread a single source, only for reference accuracy check... |
| 159 | + opts.spread_direction = 1; |
| 160 | + |
| 161 | + d_nonuniform[0] = 1.0; |
| 162 | + d_nonuniform[1] = 0.0; // unit strength |
| 163 | + kx[0] = ky[0] = kz[0] = M_PI / 2.0; // at center |
| 164 | + int ier = spreadinterp(N, N2, N3, d_uniform.data(), 1, kx.data(), ky.data(), kz.data(), d_nonuniform.data(), |
| 165 | + opts); // vector::data officially C++11 but works |
| 166 | + if (ier != 0) { |
| 167 | + printf("error when spreading M=1 pt for ref acc check (ier=%d)!\n", ier); |
| 168 | + return ier; |
| 169 | + } |
| 170 | + FLT kersumre = 0.0, kersumim = 0.0; // sum kernel on uniform grid |
| 171 | + for (BIGINT i = 0; i < Ng; ++i) { |
| 172 | + kersumre += d_uniform[2 * i]; |
| 173 | + kersumim += d_uniform[2 * i + 1]; // in case the kernel isn't real! |
| 174 | + } |
| 175 | + |
| 176 | + // now do the large-scale test w/ random sources.. |
| 177 | + printf("making random data...\n"); |
| 178 | + FLT strre = 0.0, strim = 0.0; // also sum the strengths |
| 179 | +#pragma omp parallel |
| 180 | + { |
| 181 | + unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s |
| 182 | +#pragma omp for schedule(dynamic, 1000000) reduction(+ : strre, strim) |
| 183 | + for (BIGINT i = 0; i < M; ++i) { |
| 184 | + kx[i] = randm11r(&se) * 3 * M_PI; |
| 185 | + // kx[i]=2.0*kx[i] - 50.0; //// to test folding within +-1 period |
| 186 | + if (d > 1) |
| 187 | + ky[i] = randm11r(&se) * 3 * M_PI; // only fill needed coords |
| 188 | + if (d > 2) |
| 189 | + kz[i] = randm11r(&se) * 3 * M_PI; |
| 190 | + d_nonuniform[i * 2] = randm11r(&se); |
| 191 | + d_nonuniform[i * 2 + 1] = randm11r(&se); |
| 192 | + strre += d_nonuniform[2 * i]; |
| 193 | + strim += d_nonuniform[2 * i + 1]; |
| 194 | + } |
| 195 | + } |
| 196 | + CNTime timer{}; |
| 197 | + double t; |
| 198 | + if (dir == 1) { // test direction 1 (NU -> U spreading) ...................... |
| 199 | + printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n", d, (double)Ng, opts.spread_direction, |
| 200 | + tol, opts.nspread); |
| 201 | + timer.start(); |
| 202 | + ier = spreadinterp(N, N2, N3, d_uniform.data(), M, kx.data(), ky.data(), kz.data(), d_nonuniform.data(), |
| 203 | + opts); |
| 204 | + t = timer.elapsedsec(); |
| 205 | + if (ier != 0) { |
| 206 | + printf("error (ier=%d)!\n", ier); |
| 207 | + return ier; |
| 208 | + } else |
| 209 | + printf(" %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n", (double)M, t, M / t, |
| 210 | + pow(opts.nspread, d) * M / t); |
| 211 | + |
| 212 | + FLT sumre = 0.0, sumim = 0.0; // check spreading accuracy, wrapping |
| 213 | +#pragma omp parallel for reduction(+ : sumre, sumim) |
| 214 | + for (BIGINT i = 0; i < Ng; ++i) { |
| 215 | + sumre += d_uniform[2 * i]; |
| 216 | + sumim += d_uniform[2 * i + 1]; |
| 217 | + } |
| 218 | + FLT pre = kersumre * strre - kersumim * strim; // pred ans, complex mult |
| 219 | + FLT pim = kersumim * strre + kersumre * strim; |
| 220 | + FLT maxerr = std::max(fabs(sumre - pre), fabs(sumim - pim)); |
| 221 | + FLT ansmod = sqrt(sumre * sumre + sumim * sumim); |
| 222 | + printf(" rel err in total over grid: %.3g\n", maxerr / ansmod); |
| 223 | + // note this is weaker than below dir=2 test, but is good indicator that |
| 224 | + // periodic wrapping is correct |
| 225 | + } |
| 226 | + // test direction 2 (U -> NU interpolation) .............................. |
| 227 | + if (dir == 2) { |
| 228 | + printf("making more random NU pts...\n"); |
| 229 | + for (BIGINT i = 0; i < Ng; ++i) { // unit grid data |
| 230 | + d_uniform[2 * i] = 1.0; |
| 231 | + d_uniform[2 * i + 1] = 0.0; |
| 232 | + } |
| 233 | +#pragma omp parallel |
| 234 | + { |
| 235 | + unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s |
| 236 | +#pragma omp for schedule(dynamic, 1000000) |
| 237 | + for (BIGINT i = 0; i < M; ++i) { // random target pts |
| 238 | + // kx[i]=10+.9*rand01r(&s)*N; // or if want to keep ns away from edges |
| 239 | + kx[i] = randm11r(&se) * 3 * M_PI; |
| 240 | + if (d > 1) |
| 241 | + ky[i] = randm11r(&se) * 3 * M_PI; |
| 242 | + if (d > 2) |
| 243 | + kz[i] = randm11r(&se) * 3 * M_PI; |
| 244 | + } |
| 245 | + } |
| 246 | + |
| 247 | + opts.spread_direction = 2; |
| 248 | + printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n", d, (double)Ng, opts.spread_direction, |
| 249 | + tol, opts.nspread); |
| 250 | + timer.restart(); |
| 251 | + ier = spreadinterp(N, N2, N3, d_uniform.data(), M, kx.data(), ky.data(), kz.data(), d_nonuniform.data(), |
| 252 | + opts); |
| 253 | + t = timer.elapsedsec(); |
| 254 | + if (ier != 0) { |
| 255 | + printf("error (ier=%d)!\n", ier); |
| 256 | + return 1; |
| 257 | + } else |
| 258 | + printf(" %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n", (double)M, t, M / t, |
| 259 | + pow(opts.nspread, d) * M / t); |
| 260 | + |
| 261 | + // math test is worst-case error from pred value (kersum) on interp pts: |
| 262 | + maxerr = 0.0; |
| 263 | + for (BIGINT i = 0; i < M; ++i) { |
| 264 | + FLT err = std::max(fabs(d_nonuniform[2 * i] - kersumre), fabs(d_nonuniform[2 * i + 1] - kersumim)); |
| 265 | + if (err > maxerr) |
| 266 | + maxerr = err; |
| 267 | + } |
| 268 | + ansmod = sqrt(kersumre * kersumre + kersumim * kersumim); |
| 269 | + printf(" max rel err in values at NU pts: %.3g\n", maxerr / ansmod); |
| 270 | + // this is stronger test than for dir=1, since it tests sum of kernel for |
| 271 | + // each NU pt. However, it cannot detect reading |
| 272 | + // from wrong grid pts (they are all unity) |
| 273 | + } |
| 274 | + } |
| 275 | + return 0; |
| 276 | +} |
0 commit comments