Argmax & Skipped Softmax

bo3z · bo3z · commit 401919280f01 · 2022-08-11T16:56:56.000+01:00
diff --git a/hls4ml/backends/fpga/passes/remove_softmax.py b/hls4ml/backends/fpga/passes/remove_softmax.py
@@ -0,0 +1,12 @@
+from hls4ml.model.layers import Softmax
+from hls4ml.model.optimizer.optimizer import OptimizerPass
+
+class SkipSoftmax(OptimizerPass):
+    def match(self, node):
+        is_softmax = isinstance(node, Softmax)
+        remove_softmax = node.get_attr('skip', False)
+        return is_softmax and remove_softmax
+
+    def transform(self, model, node):
+        model.remove_node(node, rewire=True)
+        return True
diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
@@ -43,6 +43,11 @@ def _register_flows(self):
         ]
         quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name)
 
+        optimization_passes = [
+            'quartus:skip_softmax',
+        ]
+        optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
+
         templates = self._get_layer_templates()
         template_flow = register_flow('apply_templates', templates, requires=[init_flow], backend=self.name)
 
@@ -57,15 +62,15 @@ def _register_flows(self):
 
         extras = [
             # Ideally this should be empty
-            opt_pass for opt_pass in all_passes if opt_pass not in initializers + quartus_types + templates + writer_passes
+            opt_pass for opt_pass in all_passes if opt_pass not in initializers + quartus_types + optimization_passes + templates + writer_passes
         ]
 
         if len(extras) > 0:
             extras_flow = register_flow('extras', extras, requires=[init_flow], backend=self.name)
         else:
             extras_flow = None
 
-        ip_flow_requirements = ['optimize', init_flow, streaming_flow, quantization_flow, quartus_types_flow, extras_flow, template_flow]
+        ip_flow_requirements = ['optimize', init_flow, streaming_flow, quantization_flow, optimization_flow, quartus_types_flow, extras_flow, template_flow]
         ip_flow_requirements = list(filter(None, ip_flow_requirements))
 
         self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
@@ -52,6 +52,7 @@ def _register_flows(self):
 
         optimization_passes = [
             'vivado:optimize_pointwise_conv',
+            'vivado:skip_softmax'
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
@@ -698,7 +698,8 @@ def initialize(self):
 
 class Softmax(Activation):
     _expected_attributes = [
-        ChoiceAttribute('implementation', ['latency', 'stable', 'legacy'], default='stable')
+        ChoiceAttribute('implementation', ['latency', 'stable', 'argmax', 'legacy'], default='stable'),
+        Attribute('skip', value_type=bool, default=False),
     ]
 
     def initialize(self):
diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation.h
@@ -20,7 +20,6 @@
 #ifndef NNET_ACTIVATION_H_
 #define NNET_ACTIVATION_H_
 
-//#include <cmath>
 #include "nnet_common.h"
 
 namespace nnet {
@@ -127,7 +126,7 @@ void  sigmoid(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 //       Softmax Activation
 // *************************************************
 
-enum class softmax_implementation {latency=0, legacy=1, stable=2};
+enum class softmax_implementation {latency=0, legacy=1, stable=2, argmax=3};
 
 template<class data_T, typename CONFIG_T>
 inline unsigned softmax_idx_from_real_val(const data_T x){
@@ -248,6 +247,27 @@ void softmax_legacy(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
+    #pragma unroll
+    for (int i = 0; i < CONFIG_T::n_in; i++) {
+        res[i] = (res_T) 0;
+    }
+
+    hls_register data_T maximum = data[0];
+    hls_register int idx = 0; 
+
+    #pragma ii 1
+    for (int i = 1; i < CONFIG_T::n_in; i++) {
+        if (data[i] > maximum) {
+            maximum = data[i];
+            idx = i;
+        }
+    }
+
+    res[idx] = (res_T) 1;
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 inline void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     switch(CONFIG_T::implementation) {
@@ -263,6 +283,9 @@ inline void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
         default:
             softmax_stable<data_T, res_T, CONFIG_T>(data, res);
             break;
+        case softmax_implementation::argmax:
+            softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+            break;
     }
 }
 
diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation_stream.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation_stream.h
@@ -405,6 +405,34 @@ void softmax_legacy(stream<data_T> &data, stream<res_T> &res) {
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(stream<data_T> &data, stream<res_T> &res) {    
+    #pragma ii 1
+    for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
+        data_T in_data = data.read();
+        res_T out_data;
+
+        #pragma unroll
+        for (int i = 0; i < CONFIG_T::n_in; i++) {
+            out_data[i] = (typename res_T::value_type) 0;
+        }
+
+        hls_register typename data_T::value_type maximum = in_data[0];
+        hls_register int idx = 0; 
+
+        #pragma ii 1
+        for (int i = 1; i < CONFIG_T::n_in; i++) {
+            if (in_data[i] > maximum) {
+                maximum = in_data[i];
+                idx = i;
+            }
+        }
+
+        out_data[idx] = (typename res_T::value_type) 1;
+        res.write(out_data);
+    }
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void softmax(stream<data_T> &data, stream<res_T> &res) {
     switch(CONFIG_T::implementation) {
@@ -417,6 +445,9 @@ void softmax(stream<data_T> &data, stream<res_T> &res) {
         case softmax_implementation::legacy:
             softmax_legacy<data_T, res_T, CONFIG_T>(data, res);
             break;
+        case softmax_implementation::argmax:
+            softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+            break;
         default:
             softmax_stable<data_T, res_T, CONFIG_T>(data, res);
             break;
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_activation.h b/hls4ml/templates/vivado/nnet_utils/nnet_activation.h
@@ -155,7 +155,7 @@ void  sigmoid(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 //       Softmax Activation
 // *************************************************
 
-enum class softmax_implementation {latency=0, legacy=1, stable=2};
+enum class softmax_implementation {latency=0, legacy=1, stable=2, argmax=3};
 
 inline float exp_fcn_float(float input) {
     return std::exp(input);
@@ -382,6 +382,27 @@ void  softmax_legacy(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
+    for (int i = 0; i < CONFIG_T::n_in; i++) {
+        #pragma HLS UNROLL
+        res[i] = (res_T) 0;
+    }
+
+    data_T maximum = data[0];
+    int idx = 0; 
+
+    for (int i = 1; i < CONFIG_T::n_in; i++) {
+        #pragma HLS PIPELINE
+        if (data[i] > maximum) {
+            maximum = data[i];
+            idx = i;
+        }
+    }
+
+    res[idx] = (res_T) 1;
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     #pragma HLS inline
@@ -395,6 +416,9 @@ void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     case softmax_implementation::legacy:
         softmax_legacy<data_T, res_T, CONFIG_T>(data, res);
         break;
+    case softmax_implementation::argmax:
+        softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+        break;
     }
 }
 
@@ -776,4 +800,4 @@ void  ternary_tanh(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 
 }
 
-#endif
+#endif
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h
@@ -314,6 +314,35 @@ void softmax_legacy(hls::stream<data_T> &data, hls::stream<res_T> &res) {
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(hls::stream<data_T> &data, hls::stream<res_T> &res) {    
+    for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
+        #pragma HLS PIPELINE
+        data_T in_data = data.read();
+        res_T out_data;
+
+        for (int i = 0; i < CONFIG_T::n_in; i++) {
+            #pragma HLS UNROLL
+            out_data[i] = (typename res_T::value_type) 0;
+        }
+
+        typename data_T::value_type maximum = in_data[0];
+        int idx = 0; 
+
+        for (int i = 1; i < CONFIG_T::n_in; i++) {
+            #pragma HLS PIPELINE
+            if (in_data[i] > maximum) {
+                maximum = in_data[i];
+                idx = i;
+            }
+        }
+
+        out_data[idx] = (typename res_T::value_type) 1;
+        res.write(out_data);
+    }
+}
+
+
 template<class data_T, class res_T, typename CONFIG_T>
 void softmax(hls::stream<data_T> &data, hls::stream<res_T> &res){
     assert(CONFIG_T::axis == -1);
@@ -328,7 +357,10 @@ void softmax(hls::stream<data_T> &data, hls::stream<res_T> &res){
     case softmax_implementation::legacy:
         softmax_legacy<data_T, res_T, CONFIG_T>(data, res);
         break;
-    }    
+    case softmax_implementation::argmax:
+        softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+        break;
+    }  
 }
 
 // *************************************************
@@ -637,51 +669,7 @@ void prelu(hls::stream<data_T> &data, typename data_T::value_type alpha[CONFIG_T
     }
 }
 
-// *************************************************
-//       Binary TanH Activation
-// *************************************************
-template<class data_T, class res_T, typename CONFIG_T>
-void binary_tanh(hls::stream<data_T> &data, hls::stream<res_T> &res) {
-    PReLUActLoop: for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
-        #pragma HLS PIPELINE
-
-        data_T in_data = data.read();
-        res_T out_data;
-        #pragma HLS DATA_PACK variable=out_data
-
-        PReLUPackLoop: for (int j = 0; j < res_T::size; j++) {
-            #pragma HLS UNROLL
-            if(in_data[j] > 0) out_data[j] = (typename res_T::value_type) 1;
-            else out_data[j] = (typename res_T::value_type) -1;
-        }
-        res.write(out_data);
-    }
-}
-
-// *************************************************
-//       Ternary TanH Activation
-// *************************************************
-template<class data_T, class res_T, typename CONFIG_T>
-void ternary_tanh(hls::stream<data_T> &data, hls::stream<res_T> &res) {
-    PReLUActLoop: for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
-        #pragma HLS PIPELINE
-
-        data_T in_data = data.read();
-        res_T out_data;
-        #pragma HLS DATA_PACK variable=out_data
-
-        PReLUPackLoop: for (int j = 0; j < res_T::size; j++) {
-            #pragma HLS UNROLL
-            if(in_data[j] > 1) out_data[j] = (typename res_T::value_type) 1;
-            else if (in_data[j] <=-1) out_data[j] = (typename res_T::value_type) -1;
-            else out_data[j] = (typename res_T::value_type) 0;
-        }
-        res.write(out_data);
-    }
-}
-
-
 
 }
 
-#endif
+#endif
diff --git a/test/pytest/test_softmax.py b/test/pytest/test_softmax.py
@@ -24,7 +24,7 @@ def generate_data(function, input_shape):
     return function((1000, *input_shape))
 
 @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
-@pytest.mark.parametrize('strategy', ['stable'])
+@pytest.mark.parametrize('strategy', ['stable', 'argmax'])
 @pytest.mark.parametrize('function,input_shape,io_type', [
                             (flat_distribution, (8,), 'io_parallel'),
                             (high_accuracy_distribution, (8,), 'io_parallel'),
@@ -57,3 +57,29 @@ def test_softmax(backend, strategy, generate_data, input_shape, io_type, functio
     print('Accuracy hls4ml relative to keras: {}'.format(acc_hls4ml))
 
     assert acc_hls4ml >= 0.98
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_softmax_skipped(backend, io_type):
+    X = np.random.rand(100, 10)
+    model = tf.keras.models.Sequential()
+    model.add(tf.keras.layers.Dense(14, input_shape=(10, ), name='dense'))
+    model.add(tf.keras.layers.Activation(activation='softmax', name='softmax'))
+    model.compile()
+    
+    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    cfg['LayerName']['softmax']['skip'] = True
+    
+    odir = str(test_root_path / 'hls4mlprj_softmax_skipped_{}_{}').format(backend, io_type)
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=cfg, io_type=io_type, output_dir=odir, backend=backend)
+    hls_model.compile()
+
+    # Verify Softmax was removed
+    hls_layers = list(hls_model.get_layers()) # 0 is Input, 1 is Dense, 2 is Softmax (if not removed)
+    assert len(hls_layers)==2
+   
+    # Verify hls4ml output is equal to Dense output
+    y_keras = model.predict(X)
+    y_hls4ml = hls_model.predict(X).reshape(y_keras.shape)
+    keras_trace = hls4ml.model.profiling.get_ymodel_keras(model, X)
+    np.testing.assert_allclose(y_hls4ml, keras_trace['dense'], rtol=0, atol=2e-2)

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@ def _register_flows(self):`
`52`	`52`
`53`	`53`	`optimization_passes = [`
`54`	`54`	`'vivado:optimize_pointwise_conv',`
	`55`	`+ 'vivado:skip_softmax'`
`55`	`56`	`]`
`56`	`57`	`optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)`
`57`	`58`
Original file line number	Diff line number	Diff line change
`@@ -698,7 +698,8 @@ def initialize(self):`
`698`	`698`
`699`	`699`	`class Softmax(Activation):`
`700`	`700`	`_expected_attributes = [`
`701`		`- ChoiceAttribute('implementation', ['latency', 'stable', 'legacy'], default='stable')`
	`701`	`+ ChoiceAttribute('implementation', ['latency', 'stable', 'argmax', 'legacy'], default='stable'),`
	`702`	`+ Attribute('skip', value_type=bool, default=False),`
`702`	`703`	`]`
`703`	`704`
`704`	`705`	`def initialize(self):`