Skip to content

Commit 37a8b7b

Browse files
committed
reverted some changes, erf/erfInv fixes
1 parent 27de627 commit 37a8b7b

File tree

2 files changed

+89
-88
lines changed

2 files changed

+89
-88
lines changed

include/nbl/builtin/hlsl/concepts/core.hlsl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ template<typename T>
2929
NBL_BOOL_CONCEPT UnsignedIntegral = !nbl::hlsl::is_signed_v<T> && ::nbl::hlsl::is_integral_v<T>;
3030

3131
template<typename T>
32-
NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v<T> || nbl::hlsl::is_same_v<T, float16_t>;
32+
NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v<T>;
3333

3434
template<typename T>
3535
NBL_BOOL_CONCEPT Boolean = nbl::hlsl::is_same_v<T, bool> || (nbl::hlsl::is_vector_v<T> && nbl::hlsl::is_same_v<typename vector_traits<T>::scalar_type, bool>);
3636

3737
template <typename T>
38-
NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v<T> || nbl::hlsl::is_same_v<T, float16_t>;
38+
NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v<T>;
3939

4040
template<typename T>
4141
NBL_BOOL_CONCEPT IntegralScalar = nbl::hlsl::is_integral_v<T> && nbl::hlsl::is_scalar_v<T>;
@@ -47,7 +47,7 @@ template<typename T>
4747
NBL_BOOL_CONCEPT UnsignedIntegralScalar = !nbl::hlsl::is_signed_v<T> && ::nbl::hlsl::is_integral_v<T> && nbl::hlsl::is_scalar_v<T>;
4848

4949
template<typename T>
50-
NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v<T> && nbl::hlsl::is_scalar_v<T>) || nbl::hlsl::is_same_v<T, float16_t>;
50+
NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v<T> && nbl::hlsl::is_scalar_v<T>);
5151

5252
template<typename T>
5353
NBL_BOOL_CONCEPT BooleanScalar = concepts::Boolean<T> && nbl::hlsl::is_scalar_v<T>;

include/nbl/builtin/hlsl/tgmath/impl.hlsl

Lines changed: 86 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ struct erf_helper<float16_t>
405405
float16_t x = abs_helper<float16_t>::__call(_x);
406406

407407
float16_t t = float16_t(1.f) / (float16_t(1.f) + p * x);
408-
float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp(-x * x);
408+
float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp_helper<float16_t>::__call(-x * x);
409409

410410
return _sign * y;
411411
}
@@ -451,90 +451,91 @@ struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointSc
451451
}
452452
};
453453

454-
template<>
455-
struct erfInv_helper<float64_t>
456-
{
457-
static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x)
458-
{
459-
float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999));
460-
461-
float64_t w = float64_t(-log_helper<float32_t>::__call((float32_t(NBL_FP64_LITERAL(1.0)) - x) * float32_t(NBL_FP64_LITERAL(1.0)) + x));
462-
float64_t p;
463-
if (w < 6.250000)
464-
{
465-
w -= NBL_FP64_LITERAL(3.125000);
466-
p = NBL_FP64_LITERAL(-3.6444120640178196996e-21);
467-
p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w;
468-
p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w;
469-
p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w;
470-
p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w;
471-
p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w;
472-
p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w;
473-
p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w;
474-
p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w;
475-
p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w;
476-
p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w;
477-
p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w;
478-
p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w;
479-
p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w;
480-
p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w;
481-
p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w;
482-
p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w;
483-
p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w;
484-
p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w;
485-
p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w;
486-
p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w;
487-
p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w;
488-
p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w;
489-
}
490-
else if (w < 16.000000)
491-
{
492-
w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000);
493-
p = NBL_FP64_LITERAL(2.2137376921775787049e-09);
494-
p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w;
495-
p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w;
496-
p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w;
497-
p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w;
498-
p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w;
499-
p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w;
500-
p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w;
501-
p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w;
502-
p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w;
503-
p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w;
504-
p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w;
505-
p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w;
506-
p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w;
507-
p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w;
508-
p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w;
509-
p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w;
510-
p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w;
511-
p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w;
512-
}
513-
else
514-
{
515-
w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000);
516-
p = NBL_FP64_LITERAL(-2.7109920616438573243e-11);
517-
p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w;
518-
p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w;
519-
p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w;
520-
p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w;
521-
p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w;
522-
p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w;
523-
p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w;
524-
p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w;
525-
p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w;
526-
p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w;
527-
p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w;
528-
p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w;
529-
p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w;
530-
p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w;
531-
p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w;
532-
p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w;
533-
}
534-
535-
return p * x;
536-
}
537-
};
454+
// log doesn't accept float64_t
455+
// template<>
456+
// struct erfInv_helper<float64_t>
457+
// {
458+
// static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x)
459+
// {
460+
// float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999));
461+
462+
// float64_t w = -log_helper<float64_t>::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x));
463+
// float64_t p;
464+
// if (w < 6.250000)
465+
// {
466+
// w -= NBL_FP64_LITERAL(3.125000);
467+
// p = NBL_FP64_LITERAL(-3.6444120640178196996e-21);
468+
// p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w;
469+
// p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w;
470+
// p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w;
471+
// p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w;
472+
// p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w;
473+
// p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w;
474+
// p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w;
475+
// p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w;
476+
// p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w;
477+
// p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w;
478+
// p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w;
479+
// p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w;
480+
// p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w;
481+
// p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w;
482+
// p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w;
483+
// p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w;
484+
// p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w;
485+
// p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w;
486+
// p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w;
487+
// p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w;
488+
// p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w;
489+
// p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w;
490+
// }
491+
// else if (w < 16.000000)
492+
// {
493+
// w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000);
494+
// p = NBL_FP64_LITERAL(2.2137376921775787049e-09);
495+
// p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w;
496+
// p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w;
497+
// p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w;
498+
// p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w;
499+
// p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w;
500+
// p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w;
501+
// p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w;
502+
// p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w;
503+
// p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w;
504+
// p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w;
505+
// p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w;
506+
// p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w;
507+
// p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w;
508+
// p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w;
509+
// p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w;
510+
// p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w;
511+
// p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w;
512+
// p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w;
513+
// }
514+
// else
515+
// {
516+
// w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000);
517+
// p = NBL_FP64_LITERAL(-2.7109920616438573243e-11);
518+
// p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w;
519+
// p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w;
520+
// p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w;
521+
// p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w;
522+
// p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w;
523+
// p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w;
524+
// p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w;
525+
// p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w;
526+
// p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w;
527+
// p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w;
528+
// p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w;
529+
// p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w;
530+
// p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w;
531+
// p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w;
532+
// p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w;
533+
// p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w;
534+
// }
535+
536+
// return p * x;
537+
// }
538+
// };
538539

539540
#ifdef __HLSL_VERSION
540541
// SPIR-V already defines specializations for builtin vector types

0 commit comments

Comments
 (0)