@@ -21,6 +21,44 @@ using namespace ofh;
2121 (dst).val[1 ] = vshrq_n_s16((src).val[1 ], (exponent)); \
2222 (dst).val[2 ] = vshrq_n_s16((src).val[2 ], (exponent));
2323
24+ // / Workaround for \c vshrq_n_s16 NEON intrinsic which only accepts an immediate value known at compile time. Note that
25+ // / we are checking values up to 8 because minimum output bit width used in BFP compression is 8 bits.
26+ static inline void shift_right_x3vector_s16 (int16x8x3_t src, int16x8x3_t & dst, uint8_t exponent)
27+ {
28+ switch (exponent) {
29+ case 1 :
30+ shift_right_x3vector (src, dst, 1 );
31+ break ;
32+ case 2 :
33+ shift_right_x3vector (src, dst, 2 );
34+ break ;
35+ case 3 :
36+ shift_right_x3vector (src, dst, 3 );
37+ break ;
38+ case 4 :
39+ shift_right_x3vector (src, dst, 4 );
40+ break ;
41+ case 5 :
42+ shift_right_x3vector (src, dst, 5 );
43+ break ;
44+ case 6 :
45+ shift_right_x3vector (src, dst, 6 );
46+ break ;
47+ case 7 :
48+ shift_right_x3vector (src, dst, 7 );
49+ break ;
50+ case 8 :
51+ shift_right_x3vector (src, dst, 8 );
52+ break ;
53+ default :
54+ // Copy source to destination when exponent is 0 or incorrect value is passed.
55+ dst.val [0 ] = src.val [0 ];
56+ dst.val [1 ] = src.val [1 ];
57+ dst.val [2 ] = src.val [2 ];
58+ break ;
59+ }
60+ }
61+
2462void iq_compression_bfp_neon::compress (span<srsran::ofh::compressed_prb> output,
2563 span<const srsran::cf_t > input,
2664 const srsran::ofh::ru_compression_params& params)
@@ -52,17 +90,17 @@ void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb>
5290 int16x8x3_t vec_s16x3_3 = vld1q_s16_x3 (&input_quantized[sample_idx + NOF_SAMPLES_PER_PRB * 3 ]);
5391
5492 // Determine exponents.
55- uint8_t exponent_0 = neon::determine_bfp_exponent (vec_s16x3_0, params.data_width );
56- uint8_t exponent_1 = neon::determine_bfp_exponent (vec_s16x3_1, params.data_width );
57- uint8_t exponent_2 = neon::determine_bfp_exponent (vec_s16x3_2, params.data_width );
58- uint8_t exponent_3 = neon::determine_bfp_exponent (vec_s16x3_3, params.data_width );
93+ const uint8_t exponent_0 = neon::determine_bfp_exponent (vec_s16x3_0, params.data_width );
94+ const uint8_t exponent_1 = neon::determine_bfp_exponent (vec_s16x3_1, params.data_width );
95+ const uint8_t exponent_2 = neon::determine_bfp_exponent (vec_s16x3_2, params.data_width );
96+ const uint8_t exponent_3 = neon::determine_bfp_exponent (vec_s16x3_3, params.data_width );
5997
6098 // Shift original IQ samples right.
6199 int16x8x3_t shifted_data_0, shifted_data_1, shifted_data_2, shifted_data_3;
62- shift_right_x3vector (vec_s16x3_0, shifted_data_0, exponent_0);
63- shift_right_x3vector (vec_s16x3_1, shifted_data_1, exponent_1);
64- shift_right_x3vector (vec_s16x3_2, shifted_data_2, exponent_2);
65- shift_right_x3vector (vec_s16x3_3, shifted_data_3, exponent_3);
100+ shift_right_x3vector_s16 (vec_s16x3_0, shifted_data_0, exponent_0);
101+ shift_right_x3vector_s16 (vec_s16x3_1, shifted_data_1, exponent_1);
102+ shift_right_x3vector_s16 (vec_s16x3_2, shifted_data_2, exponent_2);
103+ shift_right_x3vector_s16 (vec_s16x3_3, shifted_data_3, exponent_3);
66104
67105 // Pack compressed samples of the PRB using utility function and save the exponent.
68106 neon::pack_prb_big_endian (output[rb], shifted_data_0, params.data_width );
@@ -83,13 +121,13 @@ void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb>
83121 int16x8x3_t vec_s16x3_1 = vld1q_s16_x3 (&input_quantized[sample_idx + NOF_SAMPLES_PER_PRB]);
84122
85123 // Determine exponents.
86- uint8_t exponent_0 = neon::determine_bfp_exponent (vec_s16x3_0, params.data_width );
87- uint8_t exponent_1 = neon::determine_bfp_exponent (vec_s16x3_1, params.data_width );
124+ const uint8_t exponent_0 = neon::determine_bfp_exponent (vec_s16x3_0, params.data_width );
125+ const uint8_t exponent_1 = neon::determine_bfp_exponent (vec_s16x3_1, params.data_width );
88126
89127 // Shift original IQ samples right.
90128 int16x8x3_t shifted_data_0, shifted_data_1;
91- shift_right_x3vector (vec_s16x3_0, shifted_data_0, exponent_0);
92- shift_right_x3vector (vec_s16x3_1, shifted_data_1, exponent_1);
129+ shift_right_x3vector_s16 (vec_s16x3_0, shifted_data_0, exponent_0);
130+ shift_right_x3vector_s16 (vec_s16x3_1, shifted_data_1, exponent_1);
93131
94132 // Pack compressed samples of the PRB using utility function.
95133 neon::pack_prb_big_endian (output[rb], shifted_data_0, params.data_width );
@@ -107,11 +145,11 @@ void iq_compression_bfp_neon::compress(span<srsran::ofh::compressed_prb>
107145 int16x8x3_t vec_s16x3 = vld1q_s16_x3 (&input_quantized[sample_idx]);
108146
109147 // Determine exponent.
110- uint8_t exponent = neon::determine_bfp_exponent (vec_s16x3, params.data_width );
148+ const uint8_t exponent = neon::determine_bfp_exponent (vec_s16x3, params.data_width );
111149
112150 // Shift original IQ samples right.
113151 int16x8x3_t shifted_data;
114- shift_right_x3vector (vec_s16x3, shifted_data, exponent);
152+ shift_right_x3vector_s16 (vec_s16x3, shifted_data, exponent);
115153
116154 // Pack compressed samples of the PRB using utility function.
117155 neon::pack_prb_big_endian (output[rb], shifted_data, params.data_width );
0 commit comments