2626# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2727# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
29+ import math
2930import numpy as np
3031import os
3132from qonnx .core .datatype import DataType
@@ -47,6 +48,84 @@ def get_nodeattr_types(self):
4748 my_attrs .update (HLSBackend .get_nodeattr_types (self ))
4849 return my_attrs
4950
51+ def lut_estimation (self ):
52+ """Calculates resource estimations for LUTs based on:
53+ - FINN-R: An End-to-End Deep-Learning Framework for Fast
54+ Exploration of Quantized Neural Networks
55+ - M. Blott, T. B. Preusser, N. J. Fraser, G. Gambardella, K. O'Brien,
56+ Y. Umuroglu, M. Leeser and K. Vissers
57+ - 12. Sep 2018
58+ """
59+ # TODO add in/out FIFO contributions
60+ P = self .get_nodeattr ("PE" )
61+ Q = self .get_nodeattr ("SIMD" )
62+ wdt = self .get_weight_datatype ()
63+ W = wdt .bitwidth ()
64+ # determine tdt with input and weight data types
65+ idt = self .get_input_datatype ()
66+ A = idt .bitwidth ()
67+ # parameters from experiments in paper mentioned above
68+ c0 = 300
69+ c1 = 1.1
70+ c2 = 0
71+ mmode = self .get_nodeattr ("mem_mode" )
72+ mstyle = self .get_nodeattr ("ram_style" )
73+ if (mmode == "internal_decoupled" and mstyle == "distributed" ) or (
74+ mmode == "internal_embedded" and self .calc_wmem () <= 128
75+ ):
76+ c2 = (P * Q * W ) * math .ceil (self .calc_wmem () / 64 )
77+
78+ # multiplication
79+ res_type = self .get_nodeattr ("resType" )
80+ if res_type == "dsp" :
81+ mult_luts = 0
82+ else :
83+ mult_luts = Q * (2 * math .ceil ((W + A ) / 6 ) - 1 ) * (W + A )
84+ # adder tree
85+ addertree_luts = (W + A ) * (2 * Q - 1 )
86+ # accumulator
87+ acc_datatype = self .get_accumulator_datatype ()
88+ acc_bits = acc_datatype .bitwidth ()
89+ k_h , k_w = self .get_nodeattr ("Kernel" )
90+ # if accDataType is not set, then it will default to INT32, which would
91+ # be a large overestimate in most (if not all) cases. In this scenario,
92+ # we would use the minimum accumulator as determined by the data types
93+ # bound, derived in https://arxiv.org/abs/2301.13376
94+ alpha = math .log (k_h * k_w , 2 ) + W + A - 1 - int (idt .signed ())
95+ acc_bits = min (
96+ acc_datatype .bitwidth (),
97+ np .ceil (alpha + math .log (1 + pow (2 , - alpha ), 2 ) + 1 ),
98+ )
99+ acc_luts = acc_bits
100+ # thresholds and threshold comparators
101+ thr_luts = 0
102+ comp_luts = 0
103+ noact = self .get_nodeattr ("noActivation" )
104+ # TODO - add 'ram_style_threshold' node attribute
105+ if noact == 0 :
106+ odt = self .get_output_datatype ()
107+ B = odt .bitwidth ()
108+ thr_luts = (2 ** B - 1 ) * acc_bits * self .calc_tmem () / 64
109+ comp_luts = (2 ** B - 1 ) * acc_bits
110+
111+ return int (
112+ c0 + c1 * (P * (mult_luts + addertree_luts + acc_luts + thr_luts + comp_luts )) + c2
113+ )
114+
115+ def dsp_estimation (self ):
116+ # multiplication
117+ P = self .get_nodeattr ("PE" )
118+ res_type = self .get_nodeattr ("resType" )
119+ wdt = self .get_weight_datatype ()
120+ W = wdt .bitwidth ()
121+ idt = self .get_input_datatype ()
122+ A = idt .bitwidth ()
123+ if res_type == "dsp" :
124+ mult_dsp = P * np .ceil ((W + A ) / 48 ) # TODO: more accurate modelling
125+ else :
126+ mult_dsp = 0
127+ return int (mult_dsp )
128+
50129 def execute_node (self , context , graph ):
51130 mode = self .get_nodeattr ("exec_mode" )
52131 mem_mode = self .get_nodeattr ("mem_mode" )
0 commit comments