@@ -650,7 +650,95 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
650650 add_format_case (fixed16_packed, 3 );
651651 }
652652
653- // TODO(Triang3l): Use the extended range float16 conversion.
653+ // Xbox 360 float16 uses extended range: exponent 31 is NOT Inf/NaN
654+ // but a valid large value (up to +/-131008). Standard PackHalf2x16
655+ // clamps to +/-65504 and produces Inf for larger values. This helper
656+ // detects where standard conversion overflowed to Inf/NaN and
657+ // re-encodes those values using the extended range representation:
658+ // halve the value, convert to standard f16 (giving exponent <= 30),
659+ // then increment the exponent by 1 (placing it in the exponent 31
660+ // slot that Xbox 360 treats as a normal value, not Inf/NaN).
661+ // Operates on a float2 packed via PackHalf2x16 into a uint32, with
662+ // per-lane overflow detection and selection.
663+ auto pack_half_2x16_extended_range = [&](spv::Id float2_value) -> spv::Id {
664+ // Standard f32 to f16 conversion (handles +/-0..65504 correctly).
665+ spv::Id standard =
666+ builder_->createUnaryBuiltinCall (type_uint_, ext_inst_glsl_std_450_,
667+ GLSLstd450PackHalf2x16, float2_value);
668+
669+ // Detect where standard conversion produced Inf or NaN
670+ // (exponent field = 31, i.e. bits [14:10] all set = 0x7C00)
671+ // in each 16-bit lane of the packed result.
672+ spv::Id const_0x7C00 = builder_->makeUintConstant (0x7C00 );
673+ spv::Id lower_exp = builder_->createBinOp (spv::OpBitwiseAnd, type_uint_,
674+ standard, const_0x7C00);
675+ spv::Id lower_overflow = builder_->createBinOp (spv::OpIEqual, type_bool_,
676+ lower_exp, const_0x7C00);
677+ spv::Id upper_bits =
678+ builder_->createBinOp (spv::OpShiftRightLogical, type_uint_, standard,
679+ builder_->makeUintConstant (16 ));
680+ spv::Id upper_exp = builder_->createBinOp (spv::OpBitwiseAnd, type_uint_,
681+ upper_bits, const_0x7C00);
682+ spv::Id upper_overflow = builder_->createBinOp (spv::OpIEqual, type_bool_,
683+ upper_exp, const_0x7C00);
684+
685+ // For values that overflowed, compute extended range encoding:
686+ // 1. Clamp to +/-131008.0 (max extended float16 can represent).
687+ spv::Id const_131008 = builder_->makeFloatConstant (131008 .0f );
688+ spv::Id const_neg_131008 = builder_->makeFloatConstant (-131008 .0f );
689+ id_vector_temp_.clear ();
690+ id_vector_temp_.push_back (const_neg_131008);
691+ id_vector_temp_.push_back (const_neg_131008);
692+ spv::Id const_neg_131008_vec2 =
693+ builder_->makeCompositeConstant (type_float2_, id_vector_temp_);
694+ id_vector_temp_.clear ();
695+ id_vector_temp_.push_back (const_131008);
696+ id_vector_temp_.push_back (const_131008);
697+ spv::Id const_131008_vec2 =
698+ builder_->makeCompositeConstant (type_float2_, id_vector_temp_);
699+ spv::Id clamped = builder_->createTriBuiltinCall (
700+ type_float2_, ext_inst_glsl_std_450_, GLSLstd450FClamp, float2_value,
701+ const_neg_131008_vec2, const_131008_vec2);
702+
703+ // 2. Halve to bring into standard float16 range (max 65504).
704+ spv::Id const_half = builder_->makeFloatConstant (0 .5f );
705+ id_vector_temp_.clear ();
706+ id_vector_temp_.push_back (const_half);
707+ id_vector_temp_.push_back (const_half);
708+ spv::Id const_half_vec2 =
709+ builder_->makeCompositeConstant (type_float2_, id_vector_temp_);
710+ spv::Id halved = builder_->createBinOp (spv::OpFMul, type_float2_, clamped,
711+ const_half_vec2);
712+
713+ // 3. Convert the halved value (will have exponent <= 30).
714+ spv::Id halved_packed = builder_->createUnaryBuiltinCall (
715+ type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16, halved);
716+
717+ // 4. Increment exponent by 1 in both lanes (add 0x0400 to each
718+ // 16-bit half = 0x04000400) to compensate for the halving.
719+ spv::Id extended =
720+ builder_->createBinOp (spv::OpIAdd, type_uint_, halved_packed,
721+ builder_->makeUintConstant (0x04000400 ));
722+
723+ // Select: use extended result where standard gave Inf/NaN,
724+ // keep standard result otherwise. Per-lane selection via masking.
725+ spv::Id const_0xFFFF = builder_->makeUintConstant (0xFFFF );
726+ spv::Id const_0xFFFF0000 = builder_->makeUintConstant (0xFFFF0000 );
727+ spv::Id result_lower = builder_->createTriOp (
728+ spv::OpSelect, type_uint_, lower_overflow,
729+ builder_->createBinOp (spv::OpBitwiseAnd, type_uint_, extended,
730+ const_0xFFFF),
731+ builder_->createBinOp (spv::OpBitwiseAnd, type_uint_, standard,
732+ const_0xFFFF));
733+ spv::Id result_upper = builder_->createTriOp (
734+ spv::OpSelect, type_uint_, upper_overflow,
735+ builder_->createBinOp (spv::OpBitwiseAnd, type_uint_, extended,
736+ const_0xFFFF0000),
737+ builder_->createBinOp (spv::OpBitwiseAnd, type_uint_, standard,
738+ const_0xFFFF0000));
739+ return builder_->createBinOp (spv::OpBitwiseOr, type_uint_, result_lower,
740+ result_upper);
741+ };
654742
655743 // k_16_FLOAT
656744 format_switch.makeBeginCase (
@@ -662,8 +750,7 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
662750 id_vector_temp_.push_back (builder_->createCompositeExtract (
663751 eM_swapped[eM_index], type_float_, 0 ));
664752 id_vector_temp_.push_back (const_float_0_);
665- spv::Id format_packed_16_float_x = builder_->createUnaryBuiltinCall (
666- type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
753+ spv::Id format_packed_16_float_x = pack_half_2x16_extended_range (
667754 builder_->createCompositeConstruct (type_float2_, id_vector_temp_));
668755 id_vector_temp_.clear ();
669756 id_vector_temp_.resize (4 , const_uint_0_);
@@ -683,11 +770,10 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
683770 uint_vector_temp_.clear ();
684771 uint_vector_temp_.push_back (0 );
685772 uint_vector_temp_.push_back (1 );
686- spv::Id format_packed_16_16_float_xy = builder_->createUnaryBuiltinCall (
687- type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
688- builder_->createRvalueSwizzle (spv::NoPrecision, type_float2_,
689- eM_swapped[eM_index],
690- uint_vector_temp_));
773+ spv::Id format_packed_16_16_float_xy =
774+ pack_half_2x16_extended_range (builder_->createRvalueSwizzle (
775+ spv::NoPrecision, type_float2_, eM_swapped[eM_index],
776+ uint_vector_temp_));
691777 id_vector_temp_.clear ();
692778 id_vector_temp_.resize (4 , const_uint_0_);
693779 id_vector_temp_.front () = format_packed_16_16_float_xy;
@@ -710,11 +796,9 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
710796 uint_vector_temp_.push_back (2 * component_index);
711797 uint_vector_temp_.push_back (2 * component_index + 1 );
712798 format_packed_16_16_16_16_float_xy_zw[component_index] =
713- builder_->createUnaryBuiltinCall (
714- type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
715- builder_->createRvalueSwizzle (spv::NoPrecision, type_float2_,
716- eM_swapped[eM_index],
717- uint_vector_temp_));
799+ pack_half_2x16_extended_range (builder_->createRvalueSwizzle (
800+ spv::NoPrecision, type_float2_, eM_swapped[eM_index],
801+ uint_vector_temp_));
718802 }
719803 id_vector_temp_.clear ();
720804 id_vector_temp_.push_back (format_packed_16_16_16_16_float_xy_zw[0 ]);
0 commit comments