Skip to content

Commit 9509b88

Browse files
authored
Merge pull request #451 from DiamonDinoia/spreader-bench
Spreader bench
2 parents 70fb0f5 + dc39543 commit 9509b88

File tree

3 files changed

+307
-1
lines changed

3 files changed

+307
-1
lines changed

perftest/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Each source test file is instantiated in single and double precision
2-
set(PERFTESTS guru_timing_test manysmallprobs spreadtestnd)
2+
set(PERFTESTS guru_timing_test manysmallprobs spreadtestnd spreadtestndall)
33

44
foreach(TEST ${PERFTESTS})
55
add_executable(${TEST} ${TEST}.cpp)

perftest/spreaderbench.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
fast = 'new.txt'
2+
slow = 'old.txt'
3+
4+
5+
def read_data(filename):
6+
data = [0] * 17
7+
with open(filename) as f1:
8+
nspread = 0
9+
speed = 0
10+
for line in f1:
11+
if 'nspread' in line:
12+
nspread = int(line.split('=')[-1])
13+
if 'pts/s' in line:
14+
speed = float(line.split(' ')[12])
15+
data[nspread] = speed
16+
return data
17+
18+
# compute relative increment in percentage between two numbers
19+
20+
21+
vec = read_data(fast)[2:]
22+
old = read_data(slow)[2:]
23+
24+
# 1 : slow = x : fast
25+
# x = (1 - slow/fast) * 100
26+
i = 2
27+
for vec, old in zip(vec, old):
28+
diff = (1 - old/vec)*100
29+
print(f'nspread={i:02d} delta={diff:.3f}%')
30+
i+=1

perftest/spreadtestndall.cpp

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
#include <finufft/defs.h>
2+
#include <finufft/spreadinterp.h>
3+
#include <finufft/utils_precindep.h>
4+
5+
#include <cmath>
6+
#include <cstdio>
7+
#include <cstdlib>
8+
#include <vector>
9+
10+
using namespace finufft::spreadinterp;
11+
using namespace finufft::utils; // for timer
12+
13+
void usage() {
14+
printf("usage: spreadtestnd dims [M N [dir [sort [flags [debug [kerpad [kerevalmeth [upsampfac]]]]]]]]\n\twhere "
15+
"dims=1,2 or 3\n\tM=# nonuniform pts\n\tN=# uniform pts\n\tdir=spreader direction "
16+
"[spread/interpolate]\n\tsort=0 (don't sort NU pts), 1 (do), or 2 (maybe sort; default)\n\tflags: expert "
17+
"timing flags, 0 is default (see spreadinterp.h)\n\tdebug=0 (less text out), 1 (more), 2 (lots)\n\tkerpad=0 "
18+
"(no pad to mult of 4), 1 (do, for kerevalmeth=0 only)\n\tkerevalmeth=0 (direct), 1 (Horner "
19+
"ppval)\n\tupsampfac>1; 2 or 1.25 for Horner\n\nexample: ./spreadtestndall 1 1e6 1e6 1 2 0 1\n");
20+
}
21+
22+
int main(int argc, char *argv[])
23+
/* Test executable for the 1D, 2D, or 3D C++ spreader, both directions.
24+
* It checks speed, and basic correctness via the grid sum of the result.
25+
* See usage() for usage. Note it currently tests only pirange=0, which is not
26+
* the use case in finufft, and can differ in speed (see devel/foldrescale*)
27+
*
28+
* Example: spreadtestndall 3 1e7 1e7 1 1
29+
*
30+
* Compilation (also check ../makefile):
31+
* g++ spreadtestndall.cpp ../src/spreadinterp.o ../src/utils.o -o spreadtestndall -fPIC -Ofast -funroll-loops
32+
* -fopenmp
33+
*
34+
*/
35+
{
36+
int d = 3; // Cmd line args & their defaults: default #dims
37+
double w;
38+
int dir = 1; // default (eg 1e-6 has nspread=7)
39+
BIGINT M = 1e6; // default # NU pts
40+
BIGINT roughNg = 1e6; // default # U pts
41+
int sort = 2; // spread_sort
42+
int flags = 0; // default
43+
int debug = 0; // default
44+
int kerpad = 0; // default
45+
int kerevalmeth = 1; // default: Horner
46+
FLT upsampfac = 2.0; // standard
47+
48+
if (argc < 2 || argc == 3 || argc > 11) {
49+
usage();
50+
return (argc > 1);
51+
}
52+
sscanf(argv[1], "%d", &d);
53+
if (d < 1 || d > 3) {
54+
printf("d must be 1, 2 or 3!\n");
55+
usage();
56+
return 1;
57+
}
58+
if (argc > 2) {
59+
sscanf(argv[2], "%lf", &w);
60+
M = (BIGINT)w; // to read "1e6" right!
61+
if (M < 1) {
62+
printf("M (# NU pts) must be positive!\n");
63+
usage();
64+
return 1;
65+
}
66+
sscanf(argv[3], "%lf", &w);
67+
roughNg = (BIGINT)w;
68+
if (roughNg < 1) {
69+
printf("N (# U pts) must be positive!\n");
70+
usage();
71+
return 1;
72+
}
73+
}
74+
if (argc > 4)
75+
sscanf(argv[4], "%d", &dir);
76+
if (argc > 5) {
77+
sscanf(argv[5], "%d", &sort);
78+
if ((sort != 0) && (sort != 1) && (sort != 2)) {
79+
printf("sort must be 0, 1 or 2!\n");
80+
usage();
81+
return 1;
82+
}
83+
}
84+
if (argc > 6)
85+
sscanf(argv[6], "%d", &flags);
86+
if (argc > 7) {
87+
sscanf(argv[7], "%d", &debug);
88+
if ((debug < 0) || (debug > 2)) {
89+
printf("debug must be 0, 1 or 2!\n");
90+
usage();
91+
return 1;
92+
}
93+
}
94+
if (argc > 8) {
95+
sscanf(argv[8], "%d", &kerpad);
96+
if ((kerpad < 0) || (kerpad > 1)) {
97+
printf("kerpad must be 0 or 1!\n");
98+
usage();
99+
return 1;
100+
}
101+
}
102+
if (argc > 9) {
103+
sscanf(argv[9], "%d", &kerevalmeth);
104+
if ((kerevalmeth < 0) || (kerevalmeth > 1)) {
105+
printf("kerevalmeth must be 0 or 1!\n");
106+
usage();
107+
return 1;
108+
}
109+
}
110+
if (argc > 10) {
111+
sscanf(argv[10], "%lf", &w);
112+
upsampfac = (FLT)w;
113+
if (upsampfac <= 1.0) {
114+
printf("upsampfac must be >1.0!\n");
115+
usage();
116+
return 1;
117+
}
118+
}
119+
120+
BIGINT N = (BIGINT)round(pow(roughNg, 1.0 / d)); // Fourier grid size per dim
121+
BIGINT Ng = (BIGINT)pow(N, d); // actual total grid points
122+
BIGINT N2 = (d >= 2) ? N : 1, N3 = (d == 3) ? N : 1; // the y and z grid sizes
123+
std::vector<FLT> kx(M), ky(1), kz(1), d_nonuniform(2 * M); // NU, Re & Im
124+
if (d > 1)
125+
ky.resize(M); // only alloc needed coords
126+
if (d > 2)
127+
kz.resize(M);
128+
std::vector<FLT> d_uniform(2 * Ng); // Re and Im
129+
130+
finufft_spread_opts opts;
131+
const auto max_digits = []() {
132+
if (std::is_same<FLT, double>::value) {
133+
return 17;
134+
} else {
135+
return 9;
136+
}
137+
}();
138+
for (int digits = 2; digits < max_digits; digits++) {
139+
const auto tol = 10.0 * pow(10.0, -digits);
140+
printf("digits=%d, tol = %.3g\n", digits, FLT(tol));
141+
int ier_set = setup_spreader(opts, tol, upsampfac, kerevalmeth, debug, 1, d);
142+
143+
if (ier_set > 1) { // exit gracefully if can't set up.
144+
printf("error when setting up spreader (ier_set=%d)!\n", ier_set);
145+
return ier_set;
146+
}
147+
opts.debug = debug; // print more diagnostics?
148+
opts.sort = sort;
149+
opts.flags = flags;
150+
opts.kerpad = kerpad;
151+
opts.upsampfac = upsampfac;
152+
opts.nthreads = 0; // max # threads used, or 0 to use what's avail
153+
opts.sort_threads = 0;
154+
opts.kerpad = 0;
155+
// opts.max_subproblem_size = 1e5;
156+
FLT maxerr, ansmod;
157+
158+
// spread a single source, only for reference accuracy check...
159+
opts.spread_direction = 1;
160+
161+
d_nonuniform[0] = 1.0;
162+
d_nonuniform[1] = 0.0; // unit strength
163+
kx[0] = ky[0] = kz[0] = M_PI / 2.0; // at center
164+
int ier = spreadinterp(N, N2, N3, d_uniform.data(), 1, kx.data(), ky.data(), kz.data(), d_nonuniform.data(),
165+
opts); // vector::data officially C++11 but works
166+
if (ier != 0) {
167+
printf("error when spreading M=1 pt for ref acc check (ier=%d)!\n", ier);
168+
return ier;
169+
}
170+
FLT kersumre = 0.0, kersumim = 0.0; // sum kernel on uniform grid
171+
for (BIGINT i = 0; i < Ng; ++i) {
172+
kersumre += d_uniform[2 * i];
173+
kersumim += d_uniform[2 * i + 1]; // in case the kernel isn't real!
174+
}
175+
176+
// now do the large-scale test w/ random sources..
177+
printf("making random data...\n");
178+
FLT strre = 0.0, strim = 0.0; // also sum the strengths
179+
#pragma omp parallel
180+
{
181+
unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
182+
#pragma omp for schedule(dynamic, 1000000) reduction(+ : strre, strim)
183+
for (BIGINT i = 0; i < M; ++i) {
184+
kx[i] = randm11r(&se) * 3 * M_PI;
185+
// kx[i]=2.0*kx[i] - 50.0; //// to test folding within +-1 period
186+
if (d > 1)
187+
ky[i] = randm11r(&se) * 3 * M_PI; // only fill needed coords
188+
if (d > 2)
189+
kz[i] = randm11r(&se) * 3 * M_PI;
190+
d_nonuniform[i * 2] = randm11r(&se);
191+
d_nonuniform[i * 2 + 1] = randm11r(&se);
192+
strre += d_nonuniform[2 * i];
193+
strim += d_nonuniform[2 * i + 1];
194+
}
195+
}
196+
CNTime timer{};
197+
double t;
198+
if (dir == 1) { // test direction 1 (NU -> U spreading) ......................
199+
printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n", d, (double)Ng, opts.spread_direction,
200+
tol, opts.nspread);
201+
timer.start();
202+
ier = spreadinterp(N, N2, N3, d_uniform.data(), M, kx.data(), ky.data(), kz.data(), d_nonuniform.data(),
203+
opts);
204+
t = timer.elapsedsec();
205+
if (ier != 0) {
206+
printf("error (ier=%d)!\n", ier);
207+
return ier;
208+
} else
209+
printf(" %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n", (double)M, t, M / t,
210+
pow(opts.nspread, d) * M / t);
211+
212+
FLT sumre = 0.0, sumim = 0.0; // check spreading accuracy, wrapping
213+
#pragma omp parallel for reduction(+ : sumre, sumim)
214+
for (BIGINT i = 0; i < Ng; ++i) {
215+
sumre += d_uniform[2 * i];
216+
sumim += d_uniform[2 * i + 1];
217+
}
218+
FLT pre = kersumre * strre - kersumim * strim; // pred ans, complex mult
219+
FLT pim = kersumim * strre + kersumre * strim;
220+
FLT maxerr = std::max(fabs(sumre - pre), fabs(sumim - pim));
221+
FLT ansmod = sqrt(sumre * sumre + sumim * sumim);
222+
printf(" rel err in total over grid: %.3g\n", maxerr / ansmod);
223+
// note this is weaker than below dir=2 test, but is good indicator that
224+
// periodic wrapping is correct
225+
}
226+
// test direction 2 (U -> NU interpolation) ..............................
227+
if (dir == 2) {
228+
printf("making more random NU pts...\n");
229+
for (BIGINT i = 0; i < Ng; ++i) { // unit grid data
230+
d_uniform[2 * i] = 1.0;
231+
d_uniform[2 * i + 1] = 0.0;
232+
}
233+
#pragma omp parallel
234+
{
235+
unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
236+
#pragma omp for schedule(dynamic, 1000000)
237+
for (BIGINT i = 0; i < M; ++i) { // random target pts
238+
// kx[i]=10+.9*rand01r(&s)*N; // or if want to keep ns away from edges
239+
kx[i] = randm11r(&se) * 3 * M_PI;
240+
if (d > 1)
241+
ky[i] = randm11r(&se) * 3 * M_PI;
242+
if (d > 2)
243+
kz[i] = randm11r(&se) * 3 * M_PI;
244+
}
245+
}
246+
247+
opts.spread_direction = 2;
248+
printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n", d, (double)Ng, opts.spread_direction,
249+
tol, opts.nspread);
250+
timer.restart();
251+
ier = spreadinterp(N, N2, N3, d_uniform.data(), M, kx.data(), ky.data(), kz.data(), d_nonuniform.data(),
252+
opts);
253+
t = timer.elapsedsec();
254+
if (ier != 0) {
255+
printf("error (ier=%d)!\n", ier);
256+
return 1;
257+
} else
258+
printf(" %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n", (double)M, t, M / t,
259+
pow(opts.nspread, d) * M / t);
260+
261+
// math test is worst-case error from pred value (kersum) on interp pts:
262+
maxerr = 0.0;
263+
for (BIGINT i = 0; i < M; ++i) {
264+
FLT err = std::max(fabs(d_nonuniform[2 * i] - kersumre), fabs(d_nonuniform[2 * i + 1] - kersumim));
265+
if (err > maxerr)
266+
maxerr = err;
267+
}
268+
ansmod = sqrt(kersumre * kersumre + kersumim * kersumim);
269+
printf(" max rel err in values at NU pts: %.3g\n", maxerr / ansmod);
270+
// this is stronger test than for dir=1, since it tests sum of kernel for
271+
// each NU pt. However, it cannot detect reading
272+
// from wrong grid pts (they are all unity)
273+
}
274+
}
275+
return 0;
276+
}

0 commit comments

Comments
 (0)