Skip to content

Commit 82053b8

Browse files
committed
Fix priority of args -use vs. per-FFT config
1 parent 7deb35a commit 82053b8

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

src/Gpu.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,19 @@ constexpr bool isInList(const string& s, initializer_list<string> list) {
151151
}
152152

153153
string clDefines(const Args& args, cl_device_id id, FFTConfig fft, const vector<KeyVal>& extraConf, u32 E, bool doLog) {
154-
map<string, string> config{args.flags};
154+
map<string, string> config;
155+
156+
// Highest priority is the requested "extra" conf
157+
config.insert(extraConf.begin(), extraConf.end());
158+
159+
// Next, args config
160+
config.insert(args.flags.begin(), args.flags.end());
161+
162+
// Lowest priority: the per-FFT config if any
155163
if (auto it = args.perFftConfig.find(fft.shape.spec()); it != args.perFftConfig.end()) {
164+
// log("Found %s\n", fft.shape.spec().c_str());
156165
config.insert(it->second.begin(), it->second.end());
157166
}
158-
config.insert(extraConf.begin(), extraConf.end());
159167

160168
for (const auto& [k, v] : config) {
161169
bool isValid = isInList(k, {

src/tune.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ void Tune::ctune() {
225225
}
226226

227227
for (FFTShape shape : shapes) {
228-
u32 exponent = primes.prevPrime(FFTConfig{shape, 0, CARRY_AUTO}.maxExp());
228+
FFTConfig fft{shape, 0, CARRY_32};
229+
u32 exponent = primes.prevPrime(fft.maxExp());
229230
// log("tuning %10s with exponent %u\n", fft.shape.spec().c_str(), exponent);
230231

231232
vector<int> bestPos(configsVect.size());
@@ -242,7 +243,7 @@ void Tune::ctune() {
242243
for (u32 k = i + 1; k < configsVect.size(); ++k) {
243244
add(c, configsVect[k][bestPos[k]]);
244245
}
245-
auto cost = Gpu::make(q, exponent, shared, FFTConfig{shape, 0, CARRY_AUTO}, c, false)->timePRP();
246+
auto cost = Gpu::make(q, exponent, shared, fft, c, false)->timePRP();
246247

247248
bool isBest = (cost < best.cost);
248249
if (isBest) {

0 commit comments

Comments
 (0)