Skip to content

Commit a55a071

Browse files
committed
Saved a couple of F64 ops along with a tiny Z improvement by tweaking MM_CHAIN=0 case
1 parent 4dd475f commit a55a071

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

src/cl/fft-middle.cl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,18 @@ void middleMul(T2 *u, u32 s, Trig trig) {
117117
if (MIDDLE >= 10) {
118118
base = csqTrigFancy(w);
119119
WADDF(2, base);
120+
base = ccubeTrigFancy(base, w);
121+
WADDF(3, base);
120122
base.x += 1;
121123
} else {
122-
base = w;
124+
base = csqTrigFancy(w);
125+
WADDF(2, base);
126+
base = ccubeTrigFancy(base, w);
127+
WADDF(3, base);
123128
base.x += 1;
124-
base = cmulFancy(base, w);
125-
WADD(2, base);
126129
}
127130

128-
for (u32 k = 3; k < MIDDLE; ++k) {
131+
for (u32 k = 4; k < MIDDLE; ++k) {
129132
base = cmulFancy(base, w);
130133
WADD(k, base);
131134
}

0 commit comments

Comments
 (0)