@@ -28,31 +28,35 @@ struct sincos_accumulator
2828
2929 static this_t create (T cosA, T sinA)
3030 {
31+ assert (sinA >= T (0.0 ));
3132 this_t retval;
3233 retval.runningSum = complex_t<T>::create (cosA, sinA);
33- // retval.runningSum.real(cosA);
34- // retval.runningSum.imag(sinA);
35- retval.wraparound = 0u;
34+ retval.wraparound = hlsl::mix (T (0.0 ), T (1.0 ), cosA == T (-1.0 ));
3635 return retval;
3736 }
3837
39- void addCosine (T cosA, T sinA)
38+ // This utility expects that all input angles being added are [0,PI], i.e. runningSum.y and sinA are always positive
39+ // We make use of sine and cosine angle sum formulas to accumulate a running sum of angles
40+ // and any "overflow" (when sum goes over PI) is stored in wraparound
41+ // This way, we can accumulate the sines and cosines of a set of angles, and only have to do one acos() call to retrieve the final sum
42+ void addAngle (T cosA, T sinA)
4043 {
4144 const T a = cosA;
4245 const T cosB = runningSum.real ();
4346 const T sinB = runningSum.imag ();
44- const bool reverse = abs<T>(min <T>(a, cosB)) > max <T>(a, cosB);
47+ const bool overflow = abs<T>(min <T>(a, cosB)) > max <T>(a, cosB);
4548 const T c = a * cosB - sinA * sinB;
4649 const T d = sinA * cosB + a * sinB;
4750
48- runningSum.real (ieee754::flipSign<T>(c, reverse ));
49- runningSum.imag (ieee754::flipSign<T>(d, reverse ));
51+ runningSum.real (ieee754::flipSign<T>(c, overflow ));
52+ runningSum.imag (ieee754::flipSign<T>(d, overflow ));
5053
51- wraparound += hlsl::mix (0u, 1u, reverse);
54+ if (overflow)
55+ wraparound++;
5256 }
5357 void addCosine (T cosA)
5458 {
55- addCosine (cosA, sqrt<T>(T (1.0 ) - cosA * cosA));
59+ addAngle (cosA, sqrt<T>(T (1.0 ) - cosA * cosA));
5660 }
5761
5862 T getSumofArccos ()
@@ -61,7 +65,7 @@ struct sincos_accumulator
6165 }
6266
6367 complex_t<T> runningSum;
64- uint16_t wraparound;
68+ T wraparound; // counts in pi (half revolutions)
6569};
6670
6771}
0 commit comments