Skip to content

Commit 707052a

Browse files
committed
[Vulkan] Implement extended range for f32 to f16 conversion in memexport
1 parent 7cb6194 commit 707052a

File tree

1 file changed

+97
-13
lines changed

1 file changed

+97
-13
lines changed

src/xenia/gpu/spirv_shader_translator_memexport.cc

Lines changed: 97 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,95 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
650650
add_format_case(fixed16_packed, 3);
651651
}
652652

653-
// TODO(Triang3l): Use the extended range float16 conversion.
653+
// Xbox 360 float16 uses extended range: exponent 31 is NOT Inf/NaN
654+
// but a valid large value (up to +/-131008). Standard PackHalf2x16
655+
// clamps to +/-65504 and produces Inf for larger values. This helper
656+
// detects where standard conversion overflowed to Inf/NaN and
657+
// re-encodes those values using the extended range representation:
658+
// halve the value, convert to standard f16 (giving exponent <= 30),
659+
// then increment the exponent by 1 (placing it in the exponent 31
660+
// slot that Xbox 360 treats as a normal value, not Inf/NaN).
661+
// Operates on a float2 packed via PackHalf2x16 into a uint32, with
662+
// per-lane overflow detection and selection.
663+
auto pack_half_2x16_extended_range = [&](spv::Id float2_value) -> spv::Id {
664+
// Standard f32 to f16 conversion (handles +/-0..65504 correctly).
665+
spv::Id standard =
666+
builder_->createUnaryBuiltinCall(type_uint_, ext_inst_glsl_std_450_,
667+
GLSLstd450PackHalf2x16, float2_value);
668+
669+
// Detect where standard conversion produced Inf or NaN
670+
// (exponent field = 31, i.e. bits [14:10] all set = 0x7C00)
671+
// in each 16-bit lane of the packed result.
672+
spv::Id const_0x7C00 = builder_->makeUintConstant(0x7C00);
673+
spv::Id lower_exp = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
674+
standard, const_0x7C00);
675+
spv::Id lower_overflow = builder_->createBinOp(spv::OpIEqual, type_bool_,
676+
lower_exp, const_0x7C00);
677+
spv::Id upper_bits =
678+
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, standard,
679+
builder_->makeUintConstant(16));
680+
spv::Id upper_exp = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
681+
upper_bits, const_0x7C00);
682+
spv::Id upper_overflow = builder_->createBinOp(spv::OpIEqual, type_bool_,
683+
upper_exp, const_0x7C00);
684+
685+
// For values that overflowed, compute extended range encoding:
686+
// 1. Clamp to +/-131008.0 (max extended float16 can represent).
687+
spv::Id const_131008 = builder_->makeFloatConstant(131008.0f);
688+
spv::Id const_neg_131008 = builder_->makeFloatConstant(-131008.0f);
689+
id_vector_temp_.clear();
690+
id_vector_temp_.push_back(const_neg_131008);
691+
id_vector_temp_.push_back(const_neg_131008);
692+
spv::Id const_neg_131008_vec2 =
693+
builder_->makeCompositeConstant(type_float2_, id_vector_temp_);
694+
id_vector_temp_.clear();
695+
id_vector_temp_.push_back(const_131008);
696+
id_vector_temp_.push_back(const_131008);
697+
spv::Id const_131008_vec2 =
698+
builder_->makeCompositeConstant(type_float2_, id_vector_temp_);
699+
spv::Id clamped = builder_->createTriBuiltinCall(
700+
type_float2_, ext_inst_glsl_std_450_, GLSLstd450FClamp, float2_value,
701+
const_neg_131008_vec2, const_131008_vec2);
702+
703+
// 2. Halve to bring into standard float16 range (max 65504).
704+
spv::Id const_half = builder_->makeFloatConstant(0.5f);
705+
id_vector_temp_.clear();
706+
id_vector_temp_.push_back(const_half);
707+
id_vector_temp_.push_back(const_half);
708+
spv::Id const_half_vec2 =
709+
builder_->makeCompositeConstant(type_float2_, id_vector_temp_);
710+
spv::Id halved = builder_->createBinOp(spv::OpFMul, type_float2_, clamped,
711+
const_half_vec2);
712+
713+
// 3. Convert the halved value (will have exponent <= 30).
714+
spv::Id halved_packed = builder_->createUnaryBuiltinCall(
715+
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16, halved);
716+
717+
// 4. Increment exponent by 1 in both lanes (add 0x0400 to each
718+
// 16-bit half = 0x04000400) to compensate for the halving.
719+
spv::Id extended =
720+
builder_->createBinOp(spv::OpIAdd, type_uint_, halved_packed,
721+
builder_->makeUintConstant(0x04000400));
722+
723+
// Select: use extended result where standard gave Inf/NaN,
724+
// keep standard result otherwise. Per-lane selection via masking.
725+
spv::Id const_0xFFFF = builder_->makeUintConstant(0xFFFF);
726+
spv::Id const_0xFFFF0000 = builder_->makeUintConstant(0xFFFF0000);
727+
spv::Id result_lower = builder_->createTriOp(
728+
spv::OpSelect, type_uint_, lower_overflow,
729+
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, extended,
730+
const_0xFFFF),
731+
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, standard,
732+
const_0xFFFF));
733+
spv::Id result_upper = builder_->createTriOp(
734+
spv::OpSelect, type_uint_, upper_overflow,
735+
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, extended,
736+
const_0xFFFF0000),
737+
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, standard,
738+
const_0xFFFF0000));
739+
return builder_->createBinOp(spv::OpBitwiseOr, type_uint_, result_lower,
740+
result_upper);
741+
};
654742

655743
// k_16_FLOAT
656744
format_switch.makeBeginCase(
@@ -662,8 +750,7 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
662750
id_vector_temp_.push_back(builder_->createCompositeExtract(
663751
eM_swapped[eM_index], type_float_, 0));
664752
id_vector_temp_.push_back(const_float_0_);
665-
spv::Id format_packed_16_float_x = builder_->createUnaryBuiltinCall(
666-
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
753+
spv::Id format_packed_16_float_x = pack_half_2x16_extended_range(
667754
builder_->createCompositeConstruct(type_float2_, id_vector_temp_));
668755
id_vector_temp_.clear();
669756
id_vector_temp_.resize(4, const_uint_0_);
@@ -683,11 +770,10 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
683770
uint_vector_temp_.clear();
684771
uint_vector_temp_.push_back(0);
685772
uint_vector_temp_.push_back(1);
686-
spv::Id format_packed_16_16_float_xy = builder_->createUnaryBuiltinCall(
687-
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
688-
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
689-
eM_swapped[eM_index],
690-
uint_vector_temp_));
773+
spv::Id format_packed_16_16_float_xy =
774+
pack_half_2x16_extended_range(builder_->createRvalueSwizzle(
775+
spv::NoPrecision, type_float2_, eM_swapped[eM_index],
776+
uint_vector_temp_));
691777
id_vector_temp_.clear();
692778
id_vector_temp_.resize(4, const_uint_0_);
693779
id_vector_temp_.front() = format_packed_16_16_float_xy;
@@ -710,11 +796,9 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
710796
uint_vector_temp_.push_back(2 * component_index);
711797
uint_vector_temp_.push_back(2 * component_index + 1);
712798
format_packed_16_16_16_16_float_xy_zw[component_index] =
713-
builder_->createUnaryBuiltinCall(
714-
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
715-
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
716-
eM_swapped[eM_index],
717-
uint_vector_temp_));
799+
pack_half_2x16_extended_range(builder_->createRvalueSwizzle(
800+
spv::NoPrecision, type_float2_, eM_swapped[eM_index],
801+
uint_vector_temp_));
718802
}
719803
id_vector_temp_.clear();
720804
id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[0]);

0 commit comments

Comments
 (0)