Skip to content

Commit d0bbddc

Browse files
authored
Fixed float denorm conversion handling for XMConvertFloatToHalf (#114)
1 parent 196104d commit d0bbddc

File tree

2 files changed

+27
-36
lines changed

2 files changed

+27
-36
lines changed

Inc/DirectXPackedVector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ namespace DirectX
7171
//------------------------------------------------------------------------------
7272
// 16 bit floating point number consisting of a sign bit, a 5 bit biased
7373
// exponent, and a 10 bit mantissa
74-
typedef uint16_t HALF;
74+
using HALF = uint16_t;
7575

7676
//------------------------------------------------------------------------------
7777
// 2D Vector; 16 bit floating point components

Inc/DirectXPackedVector.inl

Lines changed: 26 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,8 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
387387
{
388388
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
389389
__m128 V1 = _mm_set_ss(Value);
390-
__m128i V2 = _mm_cvtps_ph(V1, 0);
391-
return static_cast<HALF>(_mm_cvtsi128_si32(V2));
390+
__m128i V2 = _mm_cvtps_ph(V1, _MM_FROUND_TO_NEAREST_INT);
391+
return static_cast<HALF>(_mm_extract_epi16(V2, 0));
392392
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_)
393393
float32x4_t vFloat = vdupq_n_f32(Value);
394394
float16x4_t vHalf = vcvt_f16_f32(vFloat);
@@ -399,38 +399,29 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
399399
auto IValue = reinterpret_cast<uint32_t*>(&Value)[0];
400400
uint32_t Sign = (IValue & 0x80000000U) >> 16U;
401401
IValue = IValue & 0x7FFFFFFFU; // Hack off the sign
402-
403-
if (IValue > 0x477FE000U)
402+
if (IValue >= 0x47800000 /*e+16*/)
404403
{
405-
// The number is too large to be represented as a half. Saturate to infinity.
406-
if (((IValue & 0x7F800000) == 0x7F800000) && ((IValue & 0x7FFFFF) != 0))
407-
{
408-
Result = 0x7FFF; // NAN
409-
}
410-
else
411-
{
412-
Result = 0x7C00U; // INF
413-
}
404+
// The number is too large to be represented as a half. Return infinity or NaN
405+
Result = 0x7C00U | ((IValue > 0x7F800000) ? (0x200 | ((IValue >> 13U) & 0x3FFU)) : 0U);
414406
}
415-
else if (!IValue)
407+
else if (IValue <= 0x33000000U /*e-25*/)
416408
{
417409
Result = 0;
418410
}
411+
else if (IValue < 0x38800000U /*e-14*/)
412+
{
413+
// The number is too small to be represented as a normalized half.
414+
// Convert it to a denormalized value.
415+
uint32_t Shift = 125U - (IValue >> 23U);
416+
IValue = 0x800000U | (IValue & 0x7FFFFFU);
417+
Result = IValue >> (Shift + 1);
418+
uint32_t s = (IValue & ((1U << Shift) - 1)) != 0;
419+
Result += (Result | s) & ((IValue >> Shift) & 1U);
420+
}
419421
else
420422
{
421-
if (IValue < 0x38800000U)
422-
{
423-
// The number is too small to be represented as a normalized half.
424-
// Convert it to a denormalized value.
425-
uint32_t Shift = 113U - (IValue >> 23U);
426-
IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
427-
}
428-
else
429-
{
430-
// Rebias the exponent to represent the value as a normalized half.
431-
IValue += 0xC8000000U;
432-
}
433-
423+
// Rebias the exponent to represent the value as a normalized half.
424+
IValue += 0xC8000000U;
434425
Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U) & 0x7FFFU;
435426
}
436427
return static_cast<HALF>(Result | Sign);
@@ -477,7 +468,7 @@ inline HALF* XMConvertFloatToHalfStream
477468
__m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
478469
pFloat += InputStride * 4;
479470

480-
__m128i HV = _mm_cvtps_ph(FV, 0);
471+
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
481472

482473
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
483474
pHalf += OutputStride * 4;
@@ -492,7 +483,7 @@ inline HALF* XMConvertFloatToHalfStream
492483
__m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
493484
pFloat += InputStride * 4;
494485

495-
__m128i HV = _mm_cvtps_ph(FV, 0);
486+
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
496487

497488
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
498489
pHalf += OutputStride * 4;
@@ -510,7 +501,7 @@ inline HALF* XMConvertFloatToHalfStream
510501
__m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
511502
pFloat += InputStride * 4;
512503

513-
__m128i HV = _mm_cvtps_ph(FV, 0);
504+
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
514505

515506
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
516507
pHalf += OutputStride;
@@ -531,7 +522,7 @@ inline HALF* XMConvertFloatToHalfStream
531522
__m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
532523
pFloat += InputStride * 4;
533524

534-
__m128i HV = _mm_cvtps_ph(FV, 0);
525+
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
535526

536527
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
537528
pHalf += OutputStride;
@@ -567,7 +558,7 @@ inline HALF* XMConvertFloatToHalfStream
567558
__m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
568559
FV = _mm_blend_ps(FV, FT, 0xC);
569560

570-
__m128i HV = _mm_cvtps_ph(FV, 0);
561+
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
571562

572563
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
573564
pHalf += OutputStride * 4;
@@ -595,7 +586,7 @@ inline HALF* XMConvertFloatToHalfStream
595586
__m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
596587
FV = _mm_blend_ps(FV, FT, 0xC);
597588

598-
__m128i HV = _mm_cvtps_ph(FV, 0);
589+
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
599590

600591
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
601592
pHalf += OutputStride;
@@ -2099,7 +2090,7 @@ inline void XM_CALLCONV XMStoreHalf2
20992090
{
21002091
assert(pDestination);
21012092
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2102-
__m128i V1 = _mm_cvtps_ph(V, 0);
2093+
__m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT);
21032094
_mm_store_ss(reinterpret_cast<float*>(pDestination), _mm_castsi128_ps(V1));
21042095
#else
21052096
pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V));
@@ -2655,7 +2646,7 @@ inline void XM_CALLCONV XMStoreHalf4
26552646
{
26562647
assert(pDestination);
26572648
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2658-
__m128i V1 = _mm_cvtps_ph(V, 0);
2649+
__m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT);
26592650
_mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), V1);
26602651
#else
26612652
XMFLOAT4A t;

0 commit comments

Comments
 (0)