@@ -405,7 +405,7 @@ struct erf_helper<float16_t>
405
405
float16_t x = abs_helper<float16_t>::__call (_x);
406
406
407
407
float16_t t = float16_t (1.f ) / (float16_t (1.f ) + p * x);
408
- float16_t y = float16_t (1.f ) - (((a3 * t + a2) * t) + a1) * t * exp (-x * x);
408
+ float16_t y = float16_t (1.f ) - (((a3 * t + a2) * t) + a1) * t * exp_helper<float16_t>:: __call (-x * x);
409
409
410
410
return _sign * y;
411
411
}
@@ -451,90 +451,91 @@ struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointSc
451
451
}
452
452
};
453
453
454
- template<>
455
- struct erfInv_helper<float64_t>
456
- {
457
- static float64_t __call (NBL_CONST_REF_ARG (float64_t) _x)
458
- {
459
- float64_t x = clamp <float64_t>(_x, NBL_FP64_LITERAL (-0.99999 ), NBL_FP64_LITERAL (0.99999 ));
460
-
461
- float64_t w = float64_t (-log_helper<float32_t>::__call ((float32_t (NBL_FP64_LITERAL (1.0 )) - x) * float32_t (NBL_FP64_LITERAL (1.0 )) + x));
462
- float64_t p;
463
- if (w < 6.250000 )
464
- {
465
- w -= NBL_FP64_LITERAL (3.125000 );
466
- p = NBL_FP64_LITERAL (-3. 6444120640178196996e-21 );
467
- p = NBL_FP64_LITERAL (-1. 685059138182016589e-19 ) + p * w;
468
- p = NBL_FP64_LITERAL (1. 2858480715256400167e-18 ) + p * w;
469
- p = NBL_FP64_LITERAL (1. 115787767802518096e-17 ) + p * w;
470
- p = NBL_FP64_LITERAL (-1. 333171662854620906e-16 ) + p * w;
471
- p = NBL_FP64_LITERAL (2. 0972767875968561637e-17 ) + p * w;
472
- p = NBL_FP64_LITERAL (6. 6376381343583238325e-15 ) + p * w;
473
- p = NBL_FP64_LITERAL (-4. 0545662729752068639e-14 ) + p * w;
474
- p = NBL_FP64_LITERAL (-8. 1519341976054721522e-14 ) + p * w;
475
- p = NBL_FP64_LITERAL (2. 6335093153082322977e-12 ) + p * w;
476
- p = NBL_FP64_LITERAL (-1. 2975133253453532498e-11 ) + p * w;
477
- p = NBL_FP64_LITERAL (-5. 4154120542946279317e-11 ) + p * w;
478
- p = NBL_FP64_LITERAL (1. 051212273321532285e-09 ) + p * w;
479
- p = NBL_FP64_LITERAL (-4. 1126339803469836976e-09 ) + p * w;
480
- p = NBL_FP64_LITERAL (-2. 9070369957882005086e-08 ) + p * w;
481
- p = NBL_FP64_LITERAL (4. 2347877827932403518e-07 ) + p * w;
482
- p = NBL_FP64_LITERAL (-1. 3654692000834678645e-06 ) + p * w;
483
- p = NBL_FP64_LITERAL (-1. 3882523362786468719e-05 ) + p * w;
484
- p = NBL_FP64_LITERAL (0.0001867342080340571352 ) + p * w;
485
- p = NBL_FP64_LITERAL (-0.00074070253416626697512 ) + p * w;
486
- p = NBL_FP64_LITERAL (-0.0060336708714301490533 ) + p * w;
487
- p = NBL_FP64_LITERAL (0.24015818242558961693 ) + p * w;
488
- p = NBL_FP64_LITERAL (1.6536545626831027356 ) + p * w;
489
- }
490
- else if (w < 16.000000 )
491
- {
492
- w = sqrt_helper<float64_t>::__call (w) - NBL_FP64_LITERAL (3.250000 );
493
- p = NBL_FP64_LITERAL (2. 2137376921775787049e-09 );
494
- p = NBL_FP64_LITERAL (9. 0756561938885390979e-08 ) + p * w;
495
- p = NBL_FP64_LITERAL (-2. 7517406297064545428e-07 ) + p * w;
496
- p = NBL_FP64_LITERAL (1. 8239629214389227755e-08 ) + p * w;
497
- p = NBL_FP64_LITERAL (1. 5027403968909827627e-06 ) + p * w;
498
- p = NBL_FP64_LITERAL (-4. 013867526981545969e-06 ) + p * w;
499
- p = NBL_FP64_LITERAL (2. 9234449089955446044e-06 ) + p * w;
500
- p = NBL_FP64_LITERAL (1. 2475304481671778723e-05 ) + p * w;
501
- p = NBL_FP64_LITERAL (-4. 7318229009055733981e-05 ) + p * w;
502
- p = NBL_FP64_LITERAL (6. 8284851459573175448e-05 ) + p * w;
503
- p = NBL_FP64_LITERAL (2. 4031110387097893999e-05 ) + p * w;
504
- p = NBL_FP64_LITERAL (-0.0003550375203628474796 ) + p * w;
505
- p = NBL_FP64_LITERAL (0.00095328937973738049703 ) + p * w;
506
- p = NBL_FP64_LITERAL (-0.0016882755560235047313 ) + p * w;
507
- p = NBL_FP64_LITERAL (0.0024914420961078508066 ) + p * w;
508
- p = NBL_FP64_LITERAL (-0.0037512085075692412107 ) + p * w;
509
- p = NBL_FP64_LITERAL (0.005370914553590063617 ) + p * w;
510
- p = NBL_FP64_LITERAL (1.0052589676941592334 ) + p * w;
511
- p = NBL_FP64_LITERAL (3.0838856104922207635 ) + p * w;
512
- }
513
- else
514
- {
515
- w = sqrt_helper<float64_t>::__call (w) - NBL_FP64_LITERAL (5.000000 );
516
- p = NBL_FP64_LITERAL (-2. 7109920616438573243e-11 );
517
- p = NBL_FP64_LITERAL (-2. 5556418169965252055e-10 ) + p * w;
518
- p = NBL_FP64_LITERAL (1. 5076572693500548083e-09 ) + p * w;
519
- p = NBL_FP64_LITERAL (-3. 7894654401267369937e-09 ) + p * w;
520
- p = NBL_FP64_LITERAL (7. 6157012080783393804e-09 ) + p * w;
521
- p = NBL_FP64_LITERAL (-1. 4960026627149240478e-08 ) + p * w;
522
- p = NBL_FP64_LITERAL (2. 9147953450901080826e-08 ) + p * w;
523
- p = NBL_FP64_LITERAL (-6. 7711997758452339498e-08 ) + p * w;
524
- p = NBL_FP64_LITERAL (2. 2900482228026654717e-07 ) + p * w;
525
- p = NBL_FP64_LITERAL (-9. 9298272942317002539e-07 ) + p * w;
526
- p = NBL_FP64_LITERAL (4. 5260625972231537039e-06 ) + p * w;
527
- p = NBL_FP64_LITERAL (-1. 9681778105531670567e-05 ) + p * w;
528
- p = NBL_FP64_LITERAL (7. 5995277030017761139e-05 ) + p * w;
529
- p = NBL_FP64_LITERAL (-0.00021503011930044477347 ) + p * w;
530
- p = NBL_FP64_LITERAL (-0.00013871931833623122026 ) + p * w;
531
- p = NBL_FP64_LITERAL (1.0103004648645343977 ) + p * w;
532
- p = NBL_FP64_LITERAL (4.8499064014085844221 ) + p * w;
533
- }
534
-
535
- return p * x;
536
- }
537
- };
454
+ // log doesn't accept float64_t
455
+ // template<>
456
+ // struct erfInv_helper<float64_t>
457
+ // {
458
+ // static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x)
459
+ // {
460
+ // float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999));
461
+
462
+ // float64_t w = -log_helper<float64_t>::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x));
463
+ // float64_t p;
464
+ // if (w < 6.250000)
465
+ // {
466
+ // w -= NBL_FP64_LITERAL(3.125000);
467
+ // p = NBL_FP64_LITERAL(-3.6444120640178196996e-21);
468
+ // p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w;
469
+ // p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w;
470
+ // p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w;
471
+ // p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w;
472
+ // p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w;
473
+ // p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w;
474
+ // p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w;
475
+ // p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w;
476
+ // p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w;
477
+ // p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w;
478
+ // p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w;
479
+ // p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w;
480
+ // p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w;
481
+ // p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w;
482
+ // p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w;
483
+ // p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w;
484
+ // p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w;
485
+ // p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w;
486
+ // p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w;
487
+ // p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w;
488
+ // p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w;
489
+ // p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w;
490
+ // }
491
+ // else if (w < 16.000000)
492
+ // {
493
+ // w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000);
494
+ // p = NBL_FP64_LITERAL(2.2137376921775787049e-09);
495
+ // p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w;
496
+ // p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w;
497
+ // p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w;
498
+ // p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w;
499
+ // p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w;
500
+ // p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w;
501
+ // p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w;
502
+ // p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w;
503
+ // p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w;
504
+ // p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w;
505
+ // p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w;
506
+ // p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w;
507
+ // p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w;
508
+ // p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w;
509
+ // p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w;
510
+ // p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w;
511
+ // p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w;
512
+ // p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w;
513
+ // }
514
+ // else
515
+ // {
516
+ // w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000);
517
+ // p = NBL_FP64_LITERAL(-2.7109920616438573243e-11);
518
+ // p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w;
519
+ // p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w;
520
+ // p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w;
521
+ // p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w;
522
+ // p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w;
523
+ // p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w;
524
+ // p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w;
525
+ // p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w;
526
+ // p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w;
527
+ // p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w;
528
+ // p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w;
529
+ // p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w;
530
+ // p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w;
531
+ // p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w;
532
+ // p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w;
533
+ // p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w;
534
+ // }
535
+
536
+ // return p * x;
537
+ // }
538
+ // };
538
539
539
540
#ifdef __HLSL_VERSION
540
541
// SPIR-V already defines specializations for builtin vector types
0 commit comments