Skip to content

Commit 386b5c4

Browse files
saukaPavel Harbanau
authored andcommitted
ofh: workaround in NEON vshrq_n_s16 intrinsic when compiling with clang with PARANOID assertion level (clang allows using only compile-time constants in vshr instruction)
1 parent b2efe31 commit 386b5c4

File tree

1 file changed

+52
-14
lines changed

1 file changed

+52
-14
lines changed

lib/ofh/compression/iq_compression_bfp_neon.cpp

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,44 @@ using namespace ofh;
2121
(dst).val[1] = vshrq_n_s16((src).val[1], (exponent)); \
2222
(dst).val[2] = vshrq_n_s16((src).val[2], (exponent));
2323

24+
/// Workaround for \c vshrq_n_s16 NEON intrinsic which only accepts an immediate value known at compile time. Note that
25+
/// we are checking values up to 8 because minimum output bit width used in BFP compression is 8 bits.
26+
static inline void shift_right_x3vector_s16(int16x8x3_t src, int16x8x3_t& dst, uint8_t exponent)
27+
{
28+
switch (exponent) {
29+
case 1:
30+
shift_right_x3vector(src, dst, 1);
31+
break;
32+
case 2:
33+
shift_right_x3vector(src, dst, 2);
34+
break;
35+
case 3:
36+
shift_right_x3vector(src, dst, 3);
37+
break;
38+
case 4:
39+
shift_right_x3vector(src, dst, 4);
40+
break;
41+
case 5:
42+
shift_right_x3vector(src, dst, 5);
43+
break;
44+
case 6:
45+
shift_right_x3vector(src, dst, 6);
46+
break;
47+
case 7:
48+
shift_right_x3vector(src, dst, 7);
49+
break;
50+
case 8:
51+
shift_right_x3vector(src, dst, 8);
52+
break;
53+
default:
54+
// Copy source to destination when exponent is 0 or incorrect value is passed.
55+
dst.val[0] = src.val[0];
56+
dst.val[1] = src.val[1];
57+
dst.val[2] = src.val[2];
58+
break;
59+
}
60+
}
61+
2462
void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb> output,
2563
span<const srsran::cf_t> input,
2664
const srsran::ofh::ru_compression_params& params)
@@ -52,17 +90,17 @@ void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb>
5290
int16x8x3_t vec_s16x3_3 = vld1q_s16_x3(&input_quantized[sample_idx + NOF_SAMPLES_PER_PRB * 3]);
5391

5492
// Determine exponents.
55-
uint8_t exponent_0 = neon::determine_bfp_exponent(vec_s16x3_0, params.data_width);
56-
uint8_t exponent_1 = neon::determine_bfp_exponent(vec_s16x3_1, params.data_width);
57-
uint8_t exponent_2 = neon::determine_bfp_exponent(vec_s16x3_2, params.data_width);
58-
uint8_t exponent_3 = neon::determine_bfp_exponent(vec_s16x3_3, params.data_width);
93+
const uint8_t exponent_0 = neon::determine_bfp_exponent(vec_s16x3_0, params.data_width);
94+
const uint8_t exponent_1 = neon::determine_bfp_exponent(vec_s16x3_1, params.data_width);
95+
const uint8_t exponent_2 = neon::determine_bfp_exponent(vec_s16x3_2, params.data_width);
96+
const uint8_t exponent_3 = neon::determine_bfp_exponent(vec_s16x3_3, params.data_width);
5997

6098
// Shift original IQ samples right.
6199
int16x8x3_t shifted_data_0, shifted_data_1, shifted_data_2, shifted_data_3;
62-
shift_right_x3vector(vec_s16x3_0, shifted_data_0, exponent_0);
63-
shift_right_x3vector(vec_s16x3_1, shifted_data_1, exponent_1);
64-
shift_right_x3vector(vec_s16x3_2, shifted_data_2, exponent_2);
65-
shift_right_x3vector(vec_s16x3_3, shifted_data_3, exponent_3);
100+
shift_right_x3vector_s16(vec_s16x3_0, shifted_data_0, exponent_0);
101+
shift_right_x3vector_s16(vec_s16x3_1, shifted_data_1, exponent_1);
102+
shift_right_x3vector_s16(vec_s16x3_2, shifted_data_2, exponent_2);
103+
shift_right_x3vector_s16(vec_s16x3_3, shifted_data_3, exponent_3);
66104

67105
// Pack compressed samples of the PRB using utility function and save the exponent.
68106
neon::pack_prb_big_endian(output[rb], shifted_data_0, params.data_width);
@@ -83,13 +121,13 @@ void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb>
83121
int16x8x3_t vec_s16x3_1 = vld1q_s16_x3(&input_quantized[sample_idx + NOF_SAMPLES_PER_PRB]);
84122

85123
// Determine exponents.
86-
uint8_t exponent_0 = neon::determine_bfp_exponent(vec_s16x3_0, params.data_width);
87-
uint8_t exponent_1 = neon::determine_bfp_exponent(vec_s16x3_1, params.data_width);
124+
const uint8_t exponent_0 = neon::determine_bfp_exponent(vec_s16x3_0, params.data_width);
125+
const uint8_t exponent_1 = neon::determine_bfp_exponent(vec_s16x3_1, params.data_width);
88126

89127
// Shift original IQ samples right.
90128
int16x8x3_t shifted_data_0, shifted_data_1;
91-
shift_right_x3vector(vec_s16x3_0, shifted_data_0, exponent_0);
92-
shift_right_x3vector(vec_s16x3_1, shifted_data_1, exponent_1);
129+
shift_right_x3vector_s16(vec_s16x3_0, shifted_data_0, exponent_0);
130+
shift_right_x3vector_s16(vec_s16x3_1, shifted_data_1, exponent_1);
93131

94132
// Pack compressed samples of the PRB using utility function.
95133
neon::pack_prb_big_endian(output[rb], shifted_data_0, params.data_width);
@@ -107,11 +145,11 @@ void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb>
107145
int16x8x3_t vec_s16x3 = vld1q_s16_x3(&input_quantized[sample_idx]);
108146

109147
// Determine exponent.
110-
uint8_t exponent = neon::determine_bfp_exponent(vec_s16x3, params.data_width);
148+
const uint8_t exponent = neon::determine_bfp_exponent(vec_s16x3, params.data_width);
111149

112150
// Shift original IQ samples right.
113151
int16x8x3_t shifted_data;
114-
shift_right_x3vector(vec_s16x3, shifted_data, exponent);
152+
shift_right_x3vector_s16(vec_s16x3, shifted_data, exponent);
115153

116154
// Pack compressed samples of the PRB using utility function.
117155
neon::pack_prb_big_endian(output[rb], shifted_data, params.data_width);

0 commit comments

Comments
 (0)