@@ -395,6 +395,46 @@ namespace hlsl
395
395
return bit_cast<this_t>(data ^ ieee754::traits<float64_t>::signMask);
396
396
}
397
397
398
+ static this_t sqrt (this_t number)
399
+ {
400
+ // so it doesn't return NaN for -0.0
401
+ bool isZero = !(number.data & 0x7FFFFFFFFFFFFFFFull);
402
+ if (isZero)
403
+ return number;
404
+
405
+ bool isNegative = (number.data >> 63 ) > 0 ;
406
+ if (isNegative)
407
+ return bit_cast<this_t>(ieee754::traits<this_t>::quietNaN);
408
+
409
+ if (!FastMath)
410
+ {
411
+ bool isInf = cpp_compat_intrinsics_impl::isinf_uint_impl (number.data);
412
+ if (isInf)
413
+ return number;
414
+ }
415
+
416
+ // find square root initial guess using the fast inverse square root algorithm
417
+ nbl::hlsl::emulated_float64_t<true , true > invSquareRoot = number;
418
+ {
419
+ int64_t i = 0x5fe6eb50c7b537a9ull - (number.data >> 1 );
420
+ invSquareRoot.data = i;
421
+
422
+ nbl::hlsl::emulated_float64_t<true , true > threeHalfs = emulated_float64_t<true , true >::create (1.5 );
423
+ nbl::hlsl::emulated_float64_t<true , true > x2 = number * emulated_float64_t<true , true >::create (0.5 );
424
+ invSquareRoot = invSquareRoot * (threeHalfs - (x2 * invSquareRoot * invSquareRoot));
425
+ }
426
+
427
+ // find sqrt approximation using the Newton-Raphson method
428
+ nbl::hlsl::emulated_float64_t<true , true > squareRoot = nbl::hlsl::emulated_float64_t<true , true >::create (1.0 ) / invSquareRoot;
429
+ const int Iterations = 5 ;
430
+ for (int i = 0 ; i < Iterations; ++i)
431
+ {
432
+ squareRoot = nbl::hlsl::emulated_float64_t<true , true >::create (0.5 ) * (squareRoot + number / squareRoot);
433
+ }
434
+
435
+ return squareRoot;
436
+ }
437
+
398
438
NBL_CONSTEXPR_STATIC bool isFastMathSupported = FastMath;
399
439
};
400
440
0 commit comments