Skip to content

Commit b0416b2

Browse files
committed
Added a FastSin() method that's actually fast
1 parent 42ddd61 commit b0416b2

File tree

3 files changed

+25815
-31292
lines changed

3 files changed

+25815
-31292
lines changed

Fix64.cs

Lines changed: 58 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.IO;
66
using System.Linq;
77
using System.Numerics;
8+
using System.Runtime.CompilerServices;
89
using System.Text;
910
using System.Threading.Tasks;
1011

@@ -33,11 +34,11 @@ public partial struct Fix64 : IEquatable<Fix64>, IComparable<Fix64> {
3334
const long MIN_VALUE = long.MinValue;
3435
const int NUM_BITS = 64;
3536
const int DECIMAL_PLACES = 32;
36-
const long ONE = 0x0000000100000000;
37-
const long PI_TIMES_2 = 0x00000006487ED511;
38-
const long PI = 0x00000003243F6A88;
39-
const long PI_OVER_2 = 0x00000001921FB544;
40-
const int SIN_LUT_SIZE = 250001;
37+
const long ONE = 1L << DECIMAL_PLACES;
38+
const long PI_TIMES_2 = 0x6487ED511;
39+
const long PI = 0x3243F6A88;
40+
const long PI_OVER_2 = 0x1921FB544;
41+
const int SIN_LUT_SIZE = (int)(PI_OVER_2 >> 15);
4142

4243
/// <summary>
4344
/// Returns a number indicating the sign of a Fix64 number.
@@ -259,6 +260,7 @@ public static Fix64 FastMul(Fix64 x, Fix64 y) {
259260
return new Fix64(sum);
260261
}
261262

263+
[MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
262264
static int Clz(ulong x) {
263265
int result = 0;
264266
while ((x & 0xF000000000000000) == 0) { result += 4; x <<= 4; }
@@ -428,59 +430,79 @@ public static Fix64 Sqrt(Fix64 x) {
428430

429431
/// <summary>
430432
/// Returns the Sine of x.
431-
/// This function is accurate to around 3 * Fix64.Precision for small enough values of x.
433+
/// This function has about 9 decimals of accuracy for small values of x.
432434
/// It may lose accuracy as the value of x grows.
433435
/// Performance: about 25% slower than Math.Sin() in x64, and 200% slower in x86.
434436
/// </summary>
435437
public static Fix64 Sin(Fix64 x) {
436-
var xl = x.m_rawValue;
438+
bool flipHorizontal, flipVertical;
439+
var clampedL = ClampSinValue(x.m_rawValue, out flipHorizontal, out flipVertical);
440+
var clamped = new Fix64(clampedL);
441+
442+
// Find the two closest values in the LUT and perform linear interpolation
443+
// This is what kills the performance of this function on x86 - x64 is fine though
444+
var rawIndex = FastMul(clamped, SinInterval);
445+
var roundedIndex = Round(rawIndex);
446+
var indexError = FastSub(rawIndex, roundedIndex);
447+
448+
var nearestValue = new Fix64(SinLut[flipHorizontal ?
449+
SinLut.Length - 1 - (int)roundedIndex :
450+
(int)roundedIndex]);
451+
var secondNearestValue = new Fix64(SinLut[flipHorizontal ?
452+
SinLut.Length - 1 - (int)roundedIndex - Sign(indexError) :
453+
(int)roundedIndex + Sign(indexError)]);
454+
455+
var delta = FastMul(indexError, FastAbs(FastSub(nearestValue, secondNearestValue))).m_rawValue;
456+
var interpolatedValue = nearestValue.m_rawValue + (flipHorizontal ? -delta : delta);
457+
var finalValue = flipVertical ? -interpolatedValue : interpolatedValue;
458+
return new Fix64(finalValue);
459+
}
460+
461+
/// <summary>
462+
/// Returns a rough approximation of the Sine of x.
463+
/// This is at least 3 times faster than Sin() on x86 and slightly faster than Math.Sin(),
464+
/// however its accuracy is limited to 4-5 decimals, for small enough values of x.
465+
/// </summary>
466+
public static Fix64 FastSin(Fix64 x) {
467+
bool flipHorizontal, flipVertical;
468+
var clampedL = ClampSinValue(x.m_rawValue, out flipHorizontal, out flipVertical);
469+
470+
// Here we use the fact that the SinLut table has a number of entries
471+
// equal to (PI_OVER_2 >> 15) to use the angle to index directly into it
472+
var rawIndex = (uint)(clampedL >> 15);
473+
if (rawIndex == SIN_LUT_SIZE) {
474+
--rawIndex;
475+
}
476+
var nearestValue = SinLut[flipHorizontal ?
477+
SinLut.Length - 1 - (int)rawIndex :
478+
(int)rawIndex];
479+
return new Fix64(flipVertical ? -nearestValue : nearestValue);
480+
}
437481

482+
[MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
483+
static long ClampSinValue(long angle, out bool flipHorizontal, out bool flipVertical) {
438484
// Clamp value to 0 - 2*PI using modulo; this is very slow but there's no better way AFAIK
439-
var clamped2Pi = xl % PI_TIMES_2;
440-
if (xl < 0) {
485+
var clamped2Pi = angle % PI_TIMES_2;
486+
if (angle < 0) {
441487
clamped2Pi += PI_TIMES_2;
442488
}
443489

444490
// The LUT contains values for 0 - PiOver2; every other value must be obtained by
445491
// vertical or horizontal mirroring
446-
var flipVertical = clamped2Pi >= PI;
492+
flipVertical = clamped2Pi >= PI;
447493
// obtain (angle % PI) from (angle % 2PI) - much faster than doing another modulo
448494
var clampedPi = clamped2Pi;
449495
while (clampedPi >= PI) {
450496
clampedPi -= PI;
451497
}
452-
var flipHorizontal = clampedPi >= PI_OVER_2;
498+
flipHorizontal = clampedPi >= PI_OVER_2;
453499
// obtain (angle % PI_OVER_2) from (angle % PI) - much faster than doing another modulo
454500
var clampedPiOver2 = clampedPi;
455501
if (clampedPiOver2 >= PI_OVER_2) {
456502
clampedPiOver2 -= PI_OVER_2;
457503
}
458-
var clamped = new Fix64(clampedPiOver2);
459-
460-
// Find the two closest values in the LUT and perform linear interpolation
461-
// This is unfortunately very expensive on x86; a "FastSin" could skip most of this
462-
// at the expense of accuracy
463-
var rawIndex = FastMul(clamped, SinInterval);
464-
var roundedIndex = Round(rawIndex);
465-
var indexError = FastSub(rawIndex, roundedIndex);
466-
467-
var nearestValue = new Fix64(SinLut[flipHorizontal ?
468-
SinLut.Length - 1 - (int)roundedIndex :
469-
(int)roundedIndex]);
470-
var secondNearestValue = new Fix64(SinLut[flipHorizontal ?
471-
SinLut.Length - 1 - (int)roundedIndex - Sign(indexError) :
472-
(int)roundedIndex + Sign(indexError)]);
473-
474-
var delta = FastMul(indexError, FastAbs(FastSub(nearestValue, secondNearestValue))).m_rawValue;
475-
var interpolatedValue = nearestValue.m_rawValue + (flipHorizontal ? -delta : delta);
476-
var finalValue = flipVertical ? -interpolatedValue : interpolatedValue;
477-
return new Fix64(finalValue);
478-
//var nearestValue = SinLut[flipHorizontal ?
479-
// SinLut.Length - 1 - (int)rawIndex :
480-
// (int)rawIndex];
481-
//return new Fix64(flipVertical ? -nearestValue : nearestValue);
504+
return clampedPiOver2;
482505
}
483-
484506

485507
public static explicit operator Fix64(long value) {
486508
return new Fix64(value * ONE);

0 commit comments

Comments
 (0)