@@ -406,91 +406,9 @@ inline bool isnanFloat16(uint16_t val) {
406
406
(val & FLOAT16_BIT_MANTISSA) != 0 ;
407
407
}
408
408
409
- inline uint16_t ConvertFloat32ToFloat16 (float val) {
410
- union Bits {
411
- uint32_t u_bits;
412
- float f_bits;
413
- };
414
-
415
- static const uint32_t SignMask = 0x8000 ;
416
-
417
- // Minimum f32 value representable in f16 format without denormalizing
418
- static const uint32_t Min16in32 = 0x38800000 ;
419
-
420
- // Maximum f32 value (next to infinity)
421
- static const uint32_t Max32 = 0x7f7FFFFF ;
422
-
423
- // Mask for f32 mantissa
424
- static const uint32_t Fraction32Mask = 0x007FFFFF ;
425
-
426
- // pow(2,24)
427
- static const uint32_t DenormalRatio = 0x4B800000 ;
428
-
429
- static const uint32_t NormalDelta = 0x38000000 ;
430
-
431
- Bits bits;
432
- bits.f_bits = val;
433
- uint32_t sign = bits.u_bits & (SignMask << 16 );
434
- Bits Abs;
435
- Abs.u_bits = bits.u_bits ^ sign;
436
-
437
- bool isLessThanNormal = Abs.f_bits < *(const float *)&Min16in32;
438
- bool isInfOrNaN = Abs.u_bits > Max32;
439
-
440
- if (isLessThanNormal) {
441
- // Compute Denormal result
442
- return (uint16_t )(Abs.f_bits * *(const float *)(&DenormalRatio)) | (uint16_t )(sign >> 16 );
443
- }
444
- else if (isInfOrNaN) {
445
- // Compute Inf or Nan result
446
- uint32_t Fraction = Abs.u_bits & Fraction32Mask;
447
- uint16_t IsNaN = Fraction == 0 ? 0 : 0xffff ;
448
- return (IsNaN & FLOAT16_BIT_MANTISSA) | FLOAT16_BIT_EXP | (uint16_t )(sign >> 16 );
449
- }
450
- else {
451
- // Compute Normal result
452
- return (uint16_t )((Abs.u_bits - NormalDelta) >> 13 ) | (uint16_t )(sign >> 16 );
453
- }
454
- }
455
-
456
- inline float ConvertFloat16ToFloat32 (uint16_t x) {
457
- union Bits {
458
- float f_bits;
459
- uint32_t u_bits;
460
- };
461
-
462
- uint32_t Sign = (x & FLOAT16_BIT_SIGN) << 16 ;
463
-
464
- // nan -> exponent all set and mantisa is non zero
465
- // +/-inf -> exponent all set and mantissa is zero
466
- // denorm -> exponent zero and significand nonzero
467
- uint32_t Abs = (x & 0x7fff );
468
- uint32_t IsNormal = Abs > FLOAT16_BIGGEST_DENORM;
469
- uint32_t IsInfOrNaN = Abs > FLOAT16_BIGGEST_NORMAL;
470
-
471
- // Signless Result for normals
472
- uint32_t DenormRatio = 0x33800000 ;
473
- float DenormResult = Abs * (*(float *)&DenormRatio);
474
-
475
- uint32_t AbsShifted = Abs << 13 ;
476
- // Signless Result for normals
477
- uint32_t NormalResult = AbsShifted + 0x38000000 ;
478
- // Signless Result for int & nans
479
- uint32_t InfResult = AbsShifted + 0x70000000 ;
480
-
481
- Bits bits;
482
- bits.u_bits = 0 ;
483
- if (IsInfOrNaN)
484
- bits.u_bits |= InfResult;
485
- else if (IsNormal)
486
- bits.u_bits |= NormalResult;
487
- else
488
- bits.f_bits = DenormResult;
489
- bits.u_bits |= Sign;
490
- return bits.f_bits ;
491
- }
492
- uint16_t ConvertFloat32ToFloat16 (float val);
493
- float ConvertFloat16ToFloat32 (uint16_t val);
409
+ // These are defined in ShaderOpTest.cpp using DirectXPackedVector functions.
410
+ uint16_t ConvertFloat32ToFloat16 (float val) throw();
411
+ float ConvertFloat16ToFloat32 (uint16_t val) throw();
494
412
495
413
inline bool CompareFloatULP (const float &fsrc, const float &fref, int ULPTolerance,
496
414
hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
0 commit comments