Fix stable softmax strategy in Quartus

vloncar · vloncar · commit 6fd7f5649345 · 2022-11-07T19:28:46.000+01:00
diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation.h
@@ -131,10 +131,12 @@ enum class softmax_implementation {latency=0, legacy=1, stable=2, argmax=3};
 template<class data_T, typename CONFIG_T>
 inline unsigned softmax_stable_idx_from_real_val(const data_T x){
     // Number of address bits for table
-    static constexpr int N = ceillog2(CONFIG_T::table_size);    
+    static constexpr int N = ceillog2(CONFIG_T::table_size);
 
     // Slice the top N bits of the input
-    hls_register ac_int<N, false> y = x.template slc<N>(x.width-N-1);             
+    hls_register ac_int<N, false> y = x.template slc<N>(x.width-N-1);
+    // If x is the most negative value, the slice will be 0, so we need to set the 0-th bit to ensure correctness
+    if (x != 0 && y == 0) y[0] = 1;
     return y.to_uint();
 }
 
@@ -158,11 +160,18 @@ void softmax_stable(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     Op_max<data_T> op_max;
     hls_register data_T x_max = reduce<data_T, CONFIG_T::n_in, Op_max<data_T>>(data, op_max);
 
+    // For the diffs, use the same type as the input but force rounding and saturation
+    hls_register ac_fixed<data_T::width, data_T::i_width, true, AC_RND, AC_SAT> d_xi_xmax[CONFIG_T::n_in];
+    for(unsigned i = 0; i < CONFIG_T::n_in; i++){
+        #pragma HLS unroll
+        d_xi_xmax[i] = data[i] - x_max;
+    }
+
     // Calculate all the e^x's
     hls_register typename CONFIG_T::exp_table_t exp_res[CONFIG_T::n_in];
     #pragma unroll
     for(unsigned i = 0; i < CONFIG_T::n_in; i++) {
-        exp_res[i] = exp_table[softmax_stable_idx_from_real_val<data_T, CONFIG_T>(data[i] - x_max)];
+        exp_res[i] = exp_table[softmax_stable_idx_from_real_val<data_T, CONFIG_T>(d_xi_xmax[i])];
     }
 
     // Explicitly sum previously calculated exponentials with an adder tree