Skip to content

Commit e2e88cc

Browse files
committed
Add -od FFT range overdrive
1 parent 5b54a97 commit e2e88cc

File tree

4 files changed

+26
-24
lines changed

4 files changed

+26
-24
lines changed

src/Args.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ named "config.txt" in the prpll run directory.
134134
- a list: 256:13:1K,8M
135135
See the list of FFTs at the end.
136136
137+
-od <value> : Overdrive the FFT range (ROE, CARRY32 limits). This allows to use a lower FFT for a given
138+
exponent (thus faster), but increases the risk of errors. The presence of errors is detected,
139+
but the errors are nevertheless costly computationally and better avoided.
140+
A <value> of 1 extends the range by 0.1%% (and this would be acceptable); a value of 10
141+
extends the range by 1%% (and this would be quite too much WRT errors).
142+
137143
-block <value> : PRP block size, one of: 1000, 500, 200. Default 1000.
138144
-carry long|short : force carry type. Short carry may be faster, but requires high bits/word.
139145
-prp <exponent> : run a single PRP test and exit, ignoring worktodo.txt
@@ -277,6 +283,9 @@ void Args::parse(const string& line) {
277283
}
278284
}
279285
throw "info";
286+
} else if (key == "-od") {
287+
double od = stod(s);
288+
fftOverdrive = 1 + od / 1000;
280289
} else if (key == "-tune") {
281290
assert(s.empty());
282291
doTune = true;

src/Args.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ class Args {
8484

8585
u32 iters = 0;
8686
u32 nSavefiles = 20;
87+
88+
// Extend the range of the FFTs beyond what's safe WRT ROE and CARRY32.
89+
// The FFT will handle up to fft.maxExp() * fftOverdrive
90+
// May also take values <1 to lower the max E handled.
91+
double fftOverdrive = 1;
8792

8893
void printHelp();
8994
};

src/FFTConfig.cpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright (C) Mihai Preda.
22

33
#include "FFTConfig.h"
4+
#include "Args.h"
45
#include "common.h"
56
#include "log.h"
67
#include "TuneEntry.h"
@@ -183,35 +184,25 @@ FFTConfig FFTConfig::bestFit(const Args& args, u32 E, const string& spec) {
183184
// A FFT-spec was given, simply take the first FFT from the spec that can handle E
184185
if (!spec.empty()) {
185186
FFTConfig fft{spec};
186-
if (fft.maxExp() < E) {
187-
log("%s can not handle %u\n", fft.spec().c_str(), E);
188-
throw "FFT size";
187+
if (fft.maxExp() * args.fftOverdrive < E) {
188+
log("Warning: %s (max %u) may be too small for %u\n", fft.spec().c_str(), fft.maxExp(), E);
189189
}
190190
return fft;
191-
/*
192-
for (const FFTShape& shape : FFTShape::multiSpec(spec)) {
193-
for (u32 v = 0; v < N_VARIANT; ++v) {
194-
if (FFTConfig fft{shape, v}; fft.maxExp() >= E) { return fft; }
195-
}
196-
}
197-
log("%s can not handle %u\n", spec.c_str(), E);
198-
throw "FFT size";
199-
*/
200191
}
201192

202193
// No FFT-spec given, so choose from tune.txt the fastest FFT that can handle E
203194
vector<TuneEntry> tunes = TuneEntry::readTuneFile(args);
204195
for (const TuneEntry& e : tunes) {
205196
// The first acceptable is the best as they're sorted by cost
206-
if (E <= e.fft.maxExp()) { return e.fft; }
197+
if (E <= e.fft.maxExp() * args.fftOverdrive) { return e.fft; }
207198
}
208199

209200
log("No FFTs found in tune.txt that can handle %u. Consider tuning with -tune\n", E);
210201

211202
// Take the first FFT that can handle E
212203
for (const FFTShape& shape : FFTShape::allShapes()) {
213204
for (u32 v = 0; v < 4; ++v) {
214-
if (FFTConfig fft{shape, v, CARRY_AUTO}; fft.maxExp() >= E) { return fft; }
205+
if (FFTConfig fft{shape, v, CARRY_AUTO}; fft.maxExp() * args.fftOverdrive >= E) { return fft; }
215206
}
216207
}
217208

src/Gpu.cpp

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -315,19 +315,16 @@ Gpu::Gpu(Queue* q, GpuCommon shared, FFTConfig fft, u32 E, const vector<KeyVal>&
315315
statsBits{u32(args.value("STATS", 0))},
316316
timeBufVect{profile.make("proofBufVect")}
317317
{
318-
// Sometimes we do want to run a FFT beyond a reasonable BPW (e.g. during -ztune), and these situations
319-
// coincide with logFftSize == false
320-
if (logFftSize && fft.maxExp() < E) {
321-
log("Exponent %u is too large for FFT %s\n", E, fft.spec().c_str());
322-
throw "FFT too small";
323-
}
324318

325319
float bitsPerWord = E / float(N);
326-
if (logFftSize) { log("FFT: %s %s (%.2f bpw)\n", numberK(N).c_str(), fft.spec().c_str(), bitsPerWord); }
320+
if (logFftSize) {
321+
log("FFT: %s %s (%.2f bpw)\n", numberK(N).c_str(), fft.spec().c_str(), bitsPerWord);
327322

328-
if (bitsPerWord > 20) {
329-
log("FFT size too small for exponent (%.2f bits/word).\n", bitsPerWord);
330-
throw "FFT size too small";
323+
// Sometimes we do want to run a FFT beyond a reasonable BPW (e.g. during -ztune), and these situations
324+
// coincide with logFftSize == false
325+
if (fft.maxExp() < E) {
326+
log("Warning: %s (max %u) may be too small for %u\n", fft.spec().c_str(), fft.maxExp(), E);
327+
}
331328
}
332329

333330
if (bitsPerWord < FFTShape::MIN_BPW) {

0 commit comments

Comments
 (0)