Skip to content

Commit b83eff4

Browse files
saukacodebot
authored andcommitted
ofh: fix error in avx512 bfp compression when short MTU is used
1 parent 878d1b8 commit b83eff4

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

lib/ofh/compression/iq_compression_bfp_avx512.cpp

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ static inline __m512i loadu_epi16_avx512(const void* mem_address)
3333
static void
3434
compress_prb_avx512(compressed_prb& c_prb, const int16_t* uncompr_samples, uint8_t exponent, unsigned data_width)
3535
{
36+
const __mmask32 load_mask = 0x00ffffff;
37+
3638
// Load from memory.
37-
__m512i rb_epi16 = loadu_epi16_avx512(uncompr_samples);
39+
__m512i rb_epi16 = _mm512_maskz_loadu_epi16(load_mask, uncompr_samples);
3840

3941
// Apply exponent (compress).
4042
__m512i rb_shifted_epi16 = _mm512_srai_epi16(rb_epi16, exponent);
@@ -102,10 +104,10 @@ void iq_compression_bfp_avx512::compress(span<compressed_prb> output,
102104
}
103105

104106
// Process the remaining PRBs (one PRB at a time),
105-
// except the last one - to avoid reading behind the input data memory.
106-
for (size_t rb_index_end = output.size() - 1; rb != rb_index_end; ++rb) {
107-
const __m512i AVX512_ZERO = _mm512_set1_epi16(0);
108-
__m512i rb_epi16 = loadu_epi16_avx512(&input_quantized[sample_idx]);
107+
for (size_t rb_index_end = output.size(); rb != rb_index_end; ++rb) {
108+
const __m512i AVX512_ZERO = _mm512_set1_epi16(0);
109+
const __mmask32 load_mask = 0x00ffffff;
110+
__m512i rb_epi16 = _mm512_maskz_loadu_epi16(load_mask, &input_quantized[sample_idx]);
109111

110112
// Determine BFP exponent and extract it from the first byte of the first 128bit lane.
111113
__m512i exp_epu32 = mm512::determine_bfp_exponent(rb_epi16, AVX512_ZERO, AVX512_ZERO, params.data_width);
@@ -118,13 +120,6 @@ void iq_compression_bfp_avx512::compress(span<compressed_prb> output,
118120

119121
sample_idx += NOF_SAMPLES_PER_PRB;
120122
}
121-
122-
// Use generic implementation for the remaining resource blocks.
123-
for (size_t rb_index_end = output.size(); rb != rb_index_end; ++rb) {
124-
const auto* start_it = input_quantized.begin() + sample_idx;
125-
compress_prb_generic(output[rb], {start_it, NOF_SAMPLES_PER_PRB}, params.data_width);
126-
sample_idx += NOF_SAMPLES_PER_PRB;
127-
}
128123
}
129124

130125
void iq_compression_bfp_avx512::decompress(span<cf_t> output,

0 commit comments

Comments
 (0)