Skip to content

Commit d1ff526

Browse files
committed
erfinv for fp64
1 parent 0c2de65 commit d1ff526

File tree

1 file changed

+88
-3
lines changed

1 file changed

+88
-3
lines changed

include/nbl/builtin/hlsl/tgmath/impl.hlsl

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(frexpStruct_helper, fre
146146
#define ISINF_AND_ISNAN_RETURN_TYPE conditional_t<is_vector_v<T>, vector<bool, vector_traits<T>::Dimension>, bool>
147147
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isinf_helper, isInf, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE)
148148
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isnan_helper, isNan, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE)
149-
#undef ISINF_AND_ISNAN_RETURN_TYPE
149+
#undef ISINF_AND_ISNAN_RETURN_TYPE
150150

151151
#undef DECLVAL
152152
#undef DECL_ARG
@@ -596,6 +596,91 @@ struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointSc
596596
}
597597
};
598598

599+
template<>
600+
struct erfInv_helper<float64_t>
601+
{
602+
static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x)
603+
{
604+
float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999));
605+
606+
float64_t w = -log_helper<float64_t>::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x));
607+
float64_t p;
608+
if (w < 6.250000)
609+
{
610+
w -= NBL_FP64_LITERAL(3.125000);
611+
p = NBL_FP64_LITERAL(-3.6444120640178196996e-21);
612+
p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w;
613+
p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w;
614+
p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w;
615+
p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w;
616+
p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w;
617+
p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w;
618+
p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w;
619+
p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w;
620+
p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w;
621+
p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w;
622+
p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w;
623+
p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w;
624+
p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w;
625+
p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w;
626+
p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w;
627+
p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w;
628+
p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w;
629+
p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w;
630+
p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w;
631+
p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w;
632+
p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w;
633+
p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w;
634+
}
635+
else if (w < 16.000000)
636+
{
637+
w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000);
638+
p = NBL_FP64_LITERAL(2.2137376921775787049e-09);
639+
p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w;
640+
p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w;
641+
p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w;
642+
p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w;
643+
p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w;
644+
p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w;
645+
p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w;
646+
p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w;
647+
p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w;
648+
p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w;
649+
p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w;
650+
p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w;
651+
p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w;
652+
p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w;
653+
p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w;
654+
p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w;
655+
p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w;
656+
p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w;
657+
}
658+
else
659+
{
660+
w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000);
661+
p = NBL_FP64_LITERAL(-2.7109920616438573243e-11);
662+
p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w;
663+
p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w;
664+
p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w;
665+
p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w;
666+
p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w;
667+
p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w;
668+
p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w;
669+
p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w;
670+
p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w;
671+
p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w;
672+
p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w;
673+
p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w;
674+
p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w;
675+
p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w;
676+
p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w;
677+
p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w;
678+
}
679+
680+
return p * x;
681+
}
682+
};
683+
599684
#ifdef __HLSL_VERSION
600685
// SPIR-V already defines specializations for builtin vector types
601686
#define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial<T> && !is_vector_v<T>
@@ -668,11 +753,11 @@ struct pow_helper<T NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT) >
668753
using traits = hlsl::vector_traits<T>;
669754
array_get<T, typename traits::scalar_type> getter;
670755
array_set<T, typename traits::scalar_type> setter;
671-
756+
672757
return_t output;
673758
for (uint32_t i = 0; i < traits::Dimension; ++i)
674759
setter(output, i, pow_helper<typename traits::scalar_type>::__call(getter(x, i), getter(y, i)));
675-
760+
676761
return output;
677762
}
678763
};

0 commit comments

Comments
 (0)