srsran
diff --git a/‎include/srsran/phy/generic_functions/precoding/channel_precoder.h‎
Lines changed: 1 addition & 1 deletion b/‎include/srsran/phy/generic_functions/precoding/channel_precoder.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/srsran/phy/support/resource_grid_writer.h‎
Lines changed: 1 addition & 1 deletion b/‎include/srsran/phy/support/resource_grid_writer.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_avx2.cpp‎
Lines changed: 22 additions & 21 deletions b/‎lib/phy/generic_functions/precoding/channel_precoder_avx2.cpp‎
Lines changed: 22 additions & 21 deletions
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_avx2.h‎
Lines changed: 1 addition & 1 deletion b/‎lib/phy/generic_functions/precoding/channel_precoder_avx2.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_avx512.cpp‎
Lines changed: 26 additions & 26 deletions b/‎lib/phy/generic_functions/precoding/channel_precoder_avx512.cpp‎
Lines changed: 26 additions & 26 deletions
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_avx512.h‎
Lines changed: 1 addition & 1 deletion b/‎lib/phy/generic_functions/precoding/channel_precoder_avx512.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_generic.cpp‎
Lines changed: 4 additions & 3 deletions b/‎lib/phy/generic_functions/precoding/channel_precoder_generic.cpp‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_generic.h‎
Lines changed: 1 addition & 1 deletion b/‎lib/phy/generic_functions/precoding/channel_precoder_generic.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_impl.cpp‎
Lines changed: 2 additions & 2 deletions b/‎lib/phy/generic_functions/precoding/channel_precoder_impl.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/phy/generic_functions/precoding/channel_precoder_impl.h‎
Lines changed: 4 additions & 3 deletions b/‎lib/phy/generic_functions/precoding/channel_precoder_impl.h‎
Lines changed: 4 additions & 3 deletions
@@ -34,7 +34,7 @@ class channel_precoder
   /// of RE per layer of the input buffer.
   /// \remark An assertion is triggered if the precoding matrix dimensions do not match the number of layers of the
   /// input buffer and the number of antenna ports of the output buffer.
-  virtual void apply_precoding(re_buffer_writer<>&            output,
+  virtual void apply_precoding(re_buffer_writer<cbf16_t>&     output,
                                const re_buffer_reader<>&      input,
                                const precoding_weight_matrix& precoding) const = 0;
 
 
@@ -85,7 +85,7 @@ class resource_grid_writer : public resource_grid_base
   /// \param[in] symbols Symbols to be written into the resource grid.
   /// \note The RE positions given \c k_init, the number of elements in \c symbols and the \c stride shall be within the
   /// resource grid number of subcarriers.
-  virtual void put(unsigned port, unsigned l, unsigned k_init, unsigned stride, span<const cf_t> symbols) = 0;
+  virtual void put(unsigned port, unsigned l, unsigned k_init, unsigned stride, span<const cbf16_t> symbols) = 0;
 
   /// \brief Gets a read-write view of an OFDM symbol for a given port.
   ///
 
@@ -43,9 +43,26 @@ simd_cf_interleaved operator*(const simd_cf_interleaved& re, const simd_cf_t& we
   return _mm256_fmaddsub_ps(re, weight.re, _mm256_mul_ps(_mm256_shuffle_ps(re, re, 0xb1), weight.im));
 }
 
+inline __m128i ps_to_cbf16(simd_cf_interleaved in)
+{
+  const __m256i bias = _mm256_set1_epi32(0x7fff);
+  const __m256i one  = _mm256_set1_epi32(0x1);
+
+  __m256i a_i32 = _mm256_castps_si256(in);
+
+  // Round to nearest even.
+  a_i32 = _mm256_add_epi32(a_i32, _mm256_add_epi32(bias, _mm256_and_si256(_mm256_srli_epi32(a_i32, 16), one)));
+
+  // Shift right 16 bits.
+  a_i32 = _mm256_srai_epi32(a_i32, 16);
+
+  // Pack both parts in 32-bit registers.
+  return _mm_packs_epi32(_mm256_extractf128_si256(a_i32, 0), _mm256_extractf128_si256(a_i32, 1));
+}
+
 } // namespace
 
-void channel_precoder_avx2::apply_precoding_port(span<cf_t>                port_re,
+void channel_precoder_avx2::apply_precoding_port(span<cbf16_t>             port_re,
                                                  const re_buffer_reader<>& input_re,
                                                  span<const cf_t>          port_weights) const
 {
@@ -84,15 +101,16 @@ void channel_precoder_avx2::apply_precoding_port(span<cf_t>                port_
     }
 
     // Store.
-    _mm256_storeu_ps(reinterpret_cast<float*>(&port_re[i_re]), re_out);
+    _mm_storeu_si128(reinterpret_cast<__m128i*>(&port_re[i_re]), ps_to_cbf16(re_out));
   }
 
   for (; i_re != nof_re; ++i_re) {
-    port_re[i_re] = layer_re_view_list[0][i_re] * port_weights[0];
+    cf_t sum = layer_re_view_list[0][i_re] * port_weights[0];
 
     for (unsigned i_layer = 1; i_layer != nof_layers; ++i_layer) {
-      port_re[i_re] += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];
+      sum += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];
     }
+    port_re[i_re] = sum;
   }
 }
 
@@ -181,23 +199,6 @@ static inline void layer4_map_and_ci8_to_cf(simd_cf_interleaved& out_l0,
   from_ci8_to_cf(out_l0, out_l1, out_l2, out_l3, tmp);
 }
 
-inline __m128i ps_to_cbf16(simd_cf_interleaved in)
-{
-  const __m256i bias = _mm256_set1_epi32(0x7fff);
-  const __m256i one  = _mm256_set1_epi32(0x1);
-
-  __m256i a_i32 = _mm256_castps_si256(in);
-
-  // Round to nearest even.
-  a_i32 = _mm256_add_epi32(a_i32, _mm256_add_epi32(bias, _mm256_and_si256(_mm256_srli_epi32(a_i32, 16), one)));
-
-  // Shift right 16 bits.
-  a_i32 = _mm256_srai_epi32(a_i32, 16);
-
-  // Pack both parts in 32-bit registers.
-  return _mm_packs_epi32(_mm256_extractf128_si256(a_i32, 0), _mm256_extractf128_si256(a_i32, 1));
-}
-
 void channel_precoder_avx2::apply_layer_map_and_precoding(re_buffer_writer<cbf16_t>&     output,
                                                           span<const ci8_t>              input,
                                                           const precoding_weight_matrix& precoding) const
 
@@ -22,7 +22,7 @@ class channel_precoder_avx2 : public channel_precoder_impl
 {
 public:
   // See interface for documentation.
-  void apply_precoding_port(span<cf_t>                port_re,
+  void apply_precoding_port(span<cbf16_t>             port_re,
                             const re_buffer_reader<>& input_re,
                             span<const cf_t>          port_weights) const override;
 
 
@@ -37,6 +37,27 @@ struct simd_cf_t {
 // Type to hold a set of complex numbers using an AVX512 register, with interleaved real and imaginary parts.
 using simd_cf_interleaved = __m512;
 
+inline __m256i ps_to_cbf16(simd_cf_interleaved in)
+{
+#if __AVX512BF16__
+  return (__m256i)_mm512_cvtneps_pbh(in);
+#else  // __AVX512BF16__
+  const __m512i bias = _mm512_set1_epi32(0x7fff);
+  const __m512i one  = _mm512_set1_epi32(0x1);
+
+  __m512i a_i32 = _mm512_castps_si512(in);
+
+  // Round to nearest even.
+  a_i32 = _mm512_add_epi32(a_i32, _mm512_add_epi32(bias, _mm512_and_si512(_mm512_srli_epi32(a_i32, 16), one)));
+
+  // Shift right 16 bits.
+  a_i32 = _mm512_srli_epi32(a_i32, 16);
+
+  // Pack both parts in 32-bit registers.
+  return _mm512_cvtepi32_epi16(a_i32);
+#endif // __AVX512BF16__
+}
+
 } // namespace
 
 // Multiplication operator for the precoding weights.
@@ -203,28 +224,7 @@ static inline void layer4_map_and_ci8_to_cf(simd_cf_interleaved& out0,
   from_ci8_to_cf(out0, out1, out2, out3, tmp);
 }
 
-inline __m256i ps_to_cbf16(simd_cf_interleaved in)
-{
-#if __AVX512BF16__
-  return (__m256i)_mm512_cvtneps_pbh(in);
-#else  // __AVX512BF16__
-  const __m512i bias = _mm512_set1_epi32(0x7fff);
-  const __m512i one  = _mm512_set1_epi32(0x1);
-
-  __m512i a_i32 = _mm512_castps_si512(in);
-
-  // Round to nearest even.
-  a_i32 = _mm512_add_epi32(a_i32, _mm512_add_epi32(bias, _mm512_and_si512(_mm512_srli_epi32(a_i32, 16), one)));
-
-  // Shift right 16 bits.
-  a_i32 = _mm512_srli_epi32(a_i32, 16);
-
-  // Pack both parts in 32-bit registers.
-  return _mm512_cvtepi32_epi16(a_i32);
-#endif // __AVX512BF16__
-}
-
-void channel_precoder_avx512::apply_precoding_port(span<cf_t>                port_re,
+void channel_precoder_avx512::apply_precoding_port(span<cbf16_t>             port_re,
                                                    const re_buffer_reader<>& input_re,
                                                    span<const cf_t>          port_weights) const
 {
@@ -263,15 +263,15 @@ void channel_precoder_avx512::apply_precoding_port(span<cf_t>                por
     }
 
     // Store.
-    _mm512_storeu_ps(reinterpret_cast<float*>(&port_re[i_re]), re_out);
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(&port_re[i_re]), ps_to_cbf16(re_out));
   }
 
   for (; i_re != nof_re; ++i_re) {
-    port_re[i_re] = layer_re_view_list[0][i_re] * port_weights[0];
-
+    cf_t sum = layer_re_view_list[0][i_re] * port_weights[0];
     for (unsigned i_layer = 1; i_layer != nof_layers; ++i_layer) {
-      port_re[i_re] += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];
+      sum += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];
     }
+    port_re[i_re] = sum;
   }
 }
 
 
@@ -21,7 +21,7 @@ namespace srsran {
 class channel_precoder_avx512 : public channel_precoder_impl
 {
   // See interface for documentation.
-  void apply_precoding_port(span<cf_t>                port_re,
+  void apply_precoding_port(span<cbf16_t>             port_re,
                             const re_buffer_reader<>& input_re,
                             span<const cf_t>          port_weights) const override;
 
 
@@ -12,7 +12,7 @@
 
 using namespace srsran;
 
-void channel_precoder_generic::apply_precoding_port(span<cf_t>                port_re,
+void channel_precoder_generic::apply_precoding_port(span<cbf16_t>             port_re,
                                                     const re_buffer_reader<>& input_re,
                                                     span<const cf_t>          port_weights) const
 {
@@ -26,12 +26,13 @@ void channel_precoder_generic::apply_precoding_port(span<cf_t>                po
 
   for (unsigned i_re = 0; i_re != nof_re; ++i_re) {
     // Set the port RE to the contribution of the first layer.
-    port_re[i_re] = layer_re_view_list[0][i_re] * port_weights[0];
+    cf_t sum = layer_re_view_list[0][i_re] * port_weights[0];
 
     for (unsigned i_layer = 1; i_layer != nof_layers; ++i_layer) {
       // Accumulate the contributions of all other layers.
-      port_re[i_re] += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];
+      sum += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];
     }
+    port_re[i_re] = sum;
   }
 }
 
 
@@ -21,7 +21,7 @@ namespace srsran {
 class channel_precoder_generic : public channel_precoder_impl
 {
   // See interface for documentation.
-  void apply_precoding_port(span<cf_t>                port_re,
+  void apply_precoding_port(span<cbf16_t>             port_re,
                             const re_buffer_reader<>& input_re,
                             span<const cf_t>          port_weights) const override;
 
 
@@ -12,7 +12,7 @@
 
 using namespace srsran;
 
-void channel_precoder_impl::apply_precoding(re_buffer_writer<>&            output,
+void channel_precoder_impl::apply_precoding(re_buffer_writer<cbf16_t>&     output,
                                             const re_buffer_reader<>&      input,
                                             const precoding_weight_matrix& precoding) const
 {
@@ -46,7 +46,7 @@ void channel_precoder_impl::apply_precoding(re_buffer_writer<>&            outpu
 
   for (unsigned i_port = 0; i_port != nof_tx_ports; ++i_port) {
     // View of the output RE for a single antenna port.
-    span<cf_t> port_re_view = output.get_slice(i_port);
+    span<cbf16_t> port_re_view = output.get_slice(i_port);
 
     // View of the precoding weights applicable to a single antenna port, i.e., the coefficients applied to each
     // layer for the antenna port.
 
@@ -25,7 +25,7 @@ class channel_precoder_impl : public channel_precoder
   explicit channel_precoder_impl() = default;
 
   // See interface for documentation.
-  void apply_precoding(re_buffer_writer<>&            output,
+  void apply_precoding(re_buffer_writer<cbf16_t>&     output,
                        const re_buffer_reader<>&      input,
                        const precoding_weight_matrix& precoding) const override;
 
@@ -35,8 +35,9 @@ class channel_precoder_impl : public channel_precoder
   /// \param[out] port_re   View over the RE of a single antenna port.
   /// \param[in] input     Input symbols, indexed by RE and transmit layer.
   /// \param[in] precoding Precoding coefficients, indexed by layer.
-  virtual void
-  apply_precoding_port(span<cf_t> port_re, const re_buffer_reader<>& input_re, span<const cf_t> port_weights) const = 0;
+  virtual void apply_precoding_port(span<cbf16_t>             port_re,
+                                    const re_buffer_reader<>& input_re,
+                                    span<const cf_t>          port_weights) const = 0;
 };
 
 } // namespace srsran
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ class channel_precoder_avx2 : public channel_precoder_impl`
`22`	`22`	`{`
`23`	`23`	`public:`
`24`	`24`	`// See interface for documentation.`
`25`		`- void apply_precoding_port(span<cf_t> port_re,`
	`25`	`+ void apply_precoding_port(span<cbf16_t> port_re,`
`26`	`26`	`const re_buffer_reader<>& input_re,`
`27`	`27`	`span<const cf_t> port_weights) const override;`
`28`	`28`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ namespace srsran {`
`21`	`21`	`class channel_precoder_avx512 : public channel_precoder_impl`
`22`	`22`	`{`
`23`	`23`	`// See interface for documentation.`
`24`		`- void apply_precoding_port(span<cf_t> port_re,`
	`24`	`+ void apply_precoding_port(span<cbf16_t> port_re,`
`25`	`25`	`const re_buffer_reader<>& input_re,`
`26`	`26`	`span<const cf_t> port_weights) const override;`
`27`	`27`
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`
`13`	`13`	`using namespace srsran;`
`14`	`14`
`15`		`-void channel_precoder_generic::apply_precoding_port(span<cf_t> port_re,`
	`15`	`+void channel_precoder_generic::apply_precoding_port(span<cbf16_t> port_re,`
`16`	`16`	`const re_buffer_reader<>& input_re,`
`17`	`17`	`span<const cf_t> port_weights) const`
`18`	`18`	`{`
`@@ -26,12 +26,13 @@ void channel_precoder_generic::apply_precoding_port(span<cf_t> po`
`26`	`26`
`27`	`27`	`for (unsigned i_re = 0; i_re != nof_re; ++i_re) {`
`28`	`28`	`// Set the port RE to the contribution of the first layer.`
`29`		`- port_re[i_re] = layer_re_view_list[0][i_re] * port_weights[0];`
	`29`	`+ cf_t sum = layer_re_view_list[0][i_re] * port_weights[0];`
`30`	`30`
`31`	`31`	`for (unsigned i_layer = 1; i_layer != nof_layers; ++i_layer) {`
`32`	`32`	`// Accumulate the contributions of all other layers.`
`33`		`- port_re[i_re] += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];`
	`33`	`+ sum += layer_re_view_list[i_layer][i_re] * port_weights[i_layer];`
`34`	`34`	`}`
	`35`	`+ port_re[i_re] = sum;`
`35`	`36`	`}`
`36`	`37`	`}`
`37`	`38`