|
5 | 5 | using System.IO; |
6 | 6 | using System.Linq; |
7 | 7 | using System.Numerics; |
| 8 | +using System.Runtime.CompilerServices; |
8 | 9 | using System.Text; |
9 | 10 | using System.Threading.Tasks; |
10 | 11 |
|
@@ -33,11 +34,11 @@ public partial struct Fix64 : IEquatable<Fix64>, IComparable<Fix64> { |
33 | 34 | const long MIN_VALUE = long.MinValue; |
34 | 35 | const int NUM_BITS = 64; |
35 | 36 | const int DECIMAL_PLACES = 32; |
36 | | - const long ONE = 0x0000000100000000; |
37 | | - const long PI_TIMES_2 = 0x00000006487ED511; |
38 | | - const long PI = 0x00000003243F6A88; |
39 | | - const long PI_OVER_2 = 0x00000001921FB544; |
40 | | - const int SIN_LUT_SIZE = 250001; |
| 37 | + const long ONE = 1L << DECIMAL_PLACES; |
| 38 | + const long PI_TIMES_2 = 0x6487ED511; |
| 39 | + const long PI = 0x3243F6A88; |
| 40 | + const long PI_OVER_2 = 0x1921FB544; |
| 41 | + const int SIN_LUT_SIZE = (int)(PI_OVER_2 >> 15); |
41 | 42 |
|
42 | 43 | /// <summary> |
43 | 44 | /// Returns a number indicating the sign of a Fix64 number. |
@@ -259,6 +260,7 @@ public static Fix64 FastMul(Fix64 x, Fix64 y) { |
259 | 260 | return new Fix64(sum); |
260 | 261 | } |
261 | 262 |
|
| 263 | + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] |
262 | 264 | static int Clz(ulong x) { |
263 | 265 | int result = 0; |
264 | 266 | while ((x & 0xF000000000000000) == 0) { result += 4; x <<= 4; } |
@@ -428,59 +430,79 @@ public static Fix64 Sqrt(Fix64 x) { |
428 | 430 |
|
429 | 431 | /// <summary> |
430 | 432 | /// Returns the Sine of x. |
431 | | - /// This function is accurate to around 3 * Fix64.Precision for small enough values of x. |
| 433 | + /// This function has about 9 decimals of accuracy for small values of x. |
432 | 434 | /// It may lose accuracy as the value of x grows. |
433 | 435 | /// Performance: about 25% slower than Math.Sin() in x64, and 200% slower in x86. |
434 | 436 | /// </summary> |
435 | 437 | public static Fix64 Sin(Fix64 x) { |
436 | | - var xl = x.m_rawValue; |
| 438 | + bool flipHorizontal, flipVertical; |
| 439 | + var clampedL = ClampSinValue(x.m_rawValue, out flipHorizontal, out flipVertical); |
| 440 | + var clamped = new Fix64(clampedL); |
| 441 | + |
| 442 | + // Find the two closest values in the LUT and perform linear interpolation |
| 443 | + // This is what kills the performance of this function on x86 - x64 is fine though |
| 444 | + var rawIndex = FastMul(clamped, SinInterval); |
| 445 | + var roundedIndex = Round(rawIndex); |
| 446 | + var indexError = FastSub(rawIndex, roundedIndex); |
| 447 | + |
| 448 | + var nearestValue = new Fix64(SinLut[flipHorizontal ? |
| 449 | + SinLut.Length - 1 - (int)roundedIndex : |
| 450 | + (int)roundedIndex]); |
| 451 | + var secondNearestValue = new Fix64(SinLut[flipHorizontal ? |
| 452 | + SinLut.Length - 1 - (int)roundedIndex - Sign(indexError) : |
| 453 | + (int)roundedIndex + Sign(indexError)]); |
| 454 | + |
| 455 | + var delta = FastMul(indexError, FastAbs(FastSub(nearestValue, secondNearestValue))).m_rawValue; |
| 456 | + var interpolatedValue = nearestValue.m_rawValue + (flipHorizontal ? -delta : delta); |
| 457 | + var finalValue = flipVertical ? -interpolatedValue : interpolatedValue; |
| 458 | + return new Fix64(finalValue); |
| 459 | + } |
| 460 | + |
| 461 | + /// <summary> |
| 462 | + /// Returns a rough approximation of the Sine of x. |
| 463 | + /// This is at least 3 times faster than Sin() on x86 and slightly faster than Math.Sin(), |
| 464 | + /// however its accuracy is limited to 4-5 decimals, for small enough values of x. |
| 465 | + /// </summary> |
| 466 | + public static Fix64 FastSin(Fix64 x) { |
| 467 | + bool flipHorizontal, flipVertical; |
| 468 | + var clampedL = ClampSinValue(x.m_rawValue, out flipHorizontal, out flipVertical); |
| 469 | + |
| 470 | + // Here we use the fact that the SinLut table has a number of entries |
| 471 | + // equal to (PI_OVER_2 >> 15) to use the angle to index directly into it |
| 472 | + var rawIndex = (uint)(clampedL >> 15); |
| 473 | + if (rawIndex == SIN_LUT_SIZE) { |
| 474 | + --rawIndex; |
| 475 | + } |
| 476 | + var nearestValue = SinLut[flipHorizontal ? |
| 477 | + SinLut.Length - 1 - (int)rawIndex : |
| 478 | + (int)rawIndex]; |
| 479 | + return new Fix64(flipVertical ? -nearestValue : nearestValue); |
| 480 | + } |
437 | 481 |
|
| 482 | + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] |
| 483 | + static long ClampSinValue(long angle, out bool flipHorizontal, out bool flipVertical) { |
438 | 484 | // Clamp value to 0 - 2*PI using modulo; this is very slow but there's no better way AFAIK |
439 | | - var clamped2Pi = xl % PI_TIMES_2; |
440 | | - if (xl < 0) { |
| 485 | + var clamped2Pi = angle % PI_TIMES_2; |
| 486 | + if (angle < 0) { |
441 | 487 | clamped2Pi += PI_TIMES_2; |
442 | 488 | } |
443 | 489 |
|
444 | 490 | // The LUT contains values for 0 - PiOver2; every other value must be obtained by |
445 | 491 | // vertical or horizontal mirroring |
446 | | - var flipVertical = clamped2Pi >= PI; |
| 492 | + flipVertical = clamped2Pi >= PI; |
447 | 493 | // obtain (angle % PI) from (angle % 2PI) - much faster than doing another modulo |
448 | 494 | var clampedPi = clamped2Pi; |
449 | 495 | while (clampedPi >= PI) { |
450 | 496 | clampedPi -= PI; |
451 | 497 | } |
452 | | - var flipHorizontal = clampedPi >= PI_OVER_2; |
| 498 | + flipHorizontal = clampedPi >= PI_OVER_2; |
453 | 499 | // obtain (angle % PI_OVER_2) from (angle % PI) - much faster than doing another modulo |
454 | 500 | var clampedPiOver2 = clampedPi; |
455 | 501 | if (clampedPiOver2 >= PI_OVER_2) { |
456 | 502 | clampedPiOver2 -= PI_OVER_2; |
457 | 503 | } |
458 | | - var clamped = new Fix64(clampedPiOver2); |
459 | | - |
460 | | - // Find the two closest values in the LUT and perform linear interpolation |
461 | | - // This is unfortunately very expensive on x86; a "FastSin" could skip most of this |
462 | | - // at the expense of accuracy |
463 | | - var rawIndex = FastMul(clamped, SinInterval); |
464 | | - var roundedIndex = Round(rawIndex); |
465 | | - var indexError = FastSub(rawIndex, roundedIndex); |
466 | | - |
467 | | - var nearestValue = new Fix64(SinLut[flipHorizontal ? |
468 | | - SinLut.Length - 1 - (int)roundedIndex : |
469 | | - (int)roundedIndex]); |
470 | | - var secondNearestValue = new Fix64(SinLut[flipHorizontal ? |
471 | | - SinLut.Length - 1 - (int)roundedIndex - Sign(indexError) : |
472 | | - (int)roundedIndex + Sign(indexError)]); |
473 | | - |
474 | | - var delta = FastMul(indexError, FastAbs(FastSub(nearestValue, secondNearestValue))).m_rawValue; |
475 | | - var interpolatedValue = nearestValue.m_rawValue + (flipHorizontal ? -delta : delta); |
476 | | - var finalValue = flipVertical ? -interpolatedValue : interpolatedValue; |
477 | | - return new Fix64(finalValue); |
478 | | - //var nearestValue = SinLut[flipHorizontal ? |
479 | | - // SinLut.Length - 1 - (int)rawIndex : |
480 | | - // (int)rawIndex]; |
481 | | - //return new Fix64(flipVertical ? -nearestValue : nearestValue); |
| 504 | + return clampedPiOver2; |
482 | 505 | } |
483 | | - |
484 | 506 |
|
485 | 507 | public static explicit operator Fix64(long value) { |
486 | 508 | return new Fix64(value * ONE); |
|
0 commit comments