@@ -387,8 +387,8 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
387387{
388388#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
389389 __m128 V1 = _mm_set_ss (Value);
390- __m128i V2 = _mm_cvtps_ph (V1, 0 );
391- return static_cast <HALF>(_mm_cvtsi128_si32 (V2));
390+ __m128i V2 = _mm_cvtps_ph (V1, _MM_FROUND_TO_NEAREST_INT );
391+ return static_cast <HALF>(_mm_extract_epi16 (V2, 0 ));
392392#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_)
393393 float32x4_t vFloat = vdupq_n_f32 (Value);
394394 float16x4_t vHalf = vcvt_f16_f32 (vFloat);
@@ -399,38 +399,29 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
399399 auto IValue = reinterpret_cast <uint32_t *>(&Value)[0 ];
400400 uint32_t Sign = (IValue & 0x80000000U ) >> 16U ;
401401 IValue = IValue & 0x7FFFFFFFU ; // Hack off the sign
402-
403- if (IValue > 0x477FE000U )
402+ if (IValue >= 0x47800000 /* e+16*/ )
404403 {
405- // The number is too large to be represented as a half. Saturate to infinity.
406- if (((IValue & 0x7F800000 ) == 0x7F800000 ) && ((IValue & 0x7FFFFF ) != 0 ))
407- {
408- Result = 0x7FFF ; // NAN
409- }
410- else
411- {
412- Result = 0x7C00U ; // INF
413- }
404+ // The number is too large to be represented as a half. Return infinity or NaN
405+ Result = 0x7C00U | ((IValue > 0x7F800000 ) ? (0x200 | ((IValue >> 13U ) & 0x3FFU )) : 0U );
414406 }
415- else if (! IValue)
407+ else if (IValue <= 0x33000000U /* e-25 */ )
416408 {
417409 Result = 0 ;
418410 }
411+ else if (IValue < 0x38800000U /* e-14*/ )
412+ {
413+ // The number is too small to be represented as a normalized half.
414+ // Convert it to a denormalized value.
415+ uint32_t Shift = 125U - (IValue >> 23U );
416+ IValue = 0x800000U | (IValue & 0x7FFFFFU );
417+ Result = IValue >> (Shift + 1 );
418+ uint32_t s = (IValue & ((1U << Shift) - 1 )) != 0 ;
419+ Result += (Result | s) & ((IValue >> Shift) & 1U );
420+ }
419421 else
420422 {
421- if (IValue < 0x38800000U )
422- {
423- // The number is too small to be represented as a normalized half.
424- // Convert it to a denormalized value.
425- uint32_t Shift = 113U - (IValue >> 23U );
426- IValue = (0x800000U | (IValue & 0x7FFFFFU )) >> Shift;
427- }
428- else
429- {
430- // Rebias the exponent to represent the value as a normalized half.
431- IValue += 0xC8000000U ;
432- }
433-
423+ // Rebias the exponent to represent the value as a normalized half.
424+ IValue += 0xC8000000U ;
434425 Result = ((IValue + 0x0FFFU + ((IValue >> 13U ) & 1U )) >> 13U ) & 0x7FFFU ;
435426 }
436427 return static_cast <HALF>(Result | Sign);
@@ -477,7 +468,7 @@ inline HALF* XMConvertFloatToHalfStream
477468 __m128 FV = _mm_load_ps (reinterpret_cast <const float *>(pFloat));
478469 pFloat += InputStride * 4 ;
479470
480- __m128i HV = _mm_cvtps_ph (FV, 0 );
471+ __m128i HV = _mm_cvtps_ph (FV, _MM_FROUND_TO_NEAREST_INT );
481472
482473 _mm_storel_epi64 (reinterpret_cast <__m128i*>(pHalf), HV);
483474 pHalf += OutputStride * 4 ;
@@ -492,7 +483,7 @@ inline HALF* XMConvertFloatToHalfStream
492483 __m128 FV = _mm_loadu_ps (reinterpret_cast <const float *>(pFloat));
493484 pFloat += InputStride * 4 ;
494485
495- __m128i HV = _mm_cvtps_ph (FV, 0 );
486+ __m128i HV = _mm_cvtps_ph (FV, _MM_FROUND_TO_NEAREST_INT );
496487
497488 _mm_storel_epi64 (reinterpret_cast <__m128i*>(pHalf), HV);
498489 pHalf += OutputStride * 4 ;
@@ -510,7 +501,7 @@ inline HALF* XMConvertFloatToHalfStream
510501 __m128 FV = _mm_load_ps (reinterpret_cast <const float *>(pFloat));
511502 pFloat += InputStride * 4 ;
512503
513- __m128i HV = _mm_cvtps_ph (FV, 0 );
504+ __m128i HV = _mm_cvtps_ph (FV, _MM_FROUND_TO_NEAREST_INT );
514505
515506 *reinterpret_cast <HALF*>(pHalf) = static_cast <HALF>(_mm_extract_epi16 (HV, 0 ));
516507 pHalf += OutputStride;
@@ -531,7 +522,7 @@ inline HALF* XMConvertFloatToHalfStream
531522 __m128 FV = _mm_loadu_ps (reinterpret_cast <const float *>(pFloat));
532523 pFloat += InputStride * 4 ;
533524
534- __m128i HV = _mm_cvtps_ph (FV, 0 );
525+ __m128i HV = _mm_cvtps_ph (FV, _MM_FROUND_TO_NEAREST_INT );
535526
536527 *reinterpret_cast <HALF*>(pHalf) = static_cast <HALF>(_mm_extract_epi16 (HV, 0 ));
537528 pHalf += OutputStride;
@@ -567,7 +558,7 @@ inline HALF* XMConvertFloatToHalfStream
567558 __m128 FT = _mm_blend_ps (FV3, FV4, 0x8 );
568559 FV = _mm_blend_ps (FV, FT, 0xC );
569560
570- __m128i HV = _mm_cvtps_ph (FV, 0 );
561+ __m128i HV = _mm_cvtps_ph (FV, _MM_FROUND_TO_NEAREST_INT );
571562
572563 _mm_storel_epi64 (reinterpret_cast <__m128i*>(pHalf), HV);
573564 pHalf += OutputStride * 4 ;
@@ -595,7 +586,7 @@ inline HALF* XMConvertFloatToHalfStream
595586 __m128 FT = _mm_blend_ps (FV3, FV4, 0x8 );
596587 FV = _mm_blend_ps (FV, FT, 0xC );
597588
598- __m128i HV = _mm_cvtps_ph (FV, 0 );
589+ __m128i HV = _mm_cvtps_ph (FV, _MM_FROUND_TO_NEAREST_INT );
599590
600591 *reinterpret_cast <HALF*>(pHalf) = static_cast <HALF>(_mm_extract_epi16 (HV, 0 ));
601592 pHalf += OutputStride;
@@ -2099,7 +2090,7 @@ inline void XM_CALLCONV XMStoreHalf2
20992090{
21002091 assert (pDestination);
21012092#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2102- __m128i V1 = _mm_cvtps_ph (V, 0 );
2093+ __m128i V1 = _mm_cvtps_ph (V, _MM_FROUND_TO_NEAREST_INT );
21032094 _mm_store_ss (reinterpret_cast <float *>(pDestination), _mm_castsi128_ps (V1));
21042095#else
21052096 pDestination->x = XMConvertFloatToHalf (XMVectorGetX (V));
@@ -2655,7 +2646,7 @@ inline void XM_CALLCONV XMStoreHalf4
26552646{
26562647 assert (pDestination);
26572648#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2658- __m128i V1 = _mm_cvtps_ph (V, 0 );
2649+ __m128i V1 = _mm_cvtps_ph (V, _MM_FROUND_TO_NEAREST_INT );
26592650 _mm_storel_epi64 (reinterpret_cast <__m128i*>(pDestination), V1);
26602651#else
26612652 XMFLOAT4A t;
0 commit comments