@@ -386,84 +386,6 @@ def uram_efficiency_estimation(self):
386386 uram_est_capacity = uram_est * 72 * 4096
387387 return wbits / uram_est_capacity
388388
389- def lut_estimation (self ):
390- """Calculates resource estimations for LUTs based on:
391- - FINN-R: An End-to-End Deep-Learning Framework for Fast
392- Exploration of Quantized Neural Networks
393- - M. Blott, T. B. Preusser, N. J. Fraser, G. Gambardella, K. O'Brien,
394- Y. Umuroglu, M. Leeser and K. Vissers
395- - 12. Sep 2018
396- """
397- # TODO add in/out FIFO contributions
398- P = self .get_nodeattr ("PE" )
399- Q = self .get_nodeattr ("SIMD" )
400- wdt = self .get_weight_datatype ()
401- W = wdt .bitwidth ()
402- # determine tdt with input and weight data types
403- idt = self .get_input_datatype ()
404- A = idt .bitwidth ()
405- # parameters from experiments in paper mentioned above
406- c0 = 300
407- c1 = 1.1
408- c2 = 0
409- mmode = self .get_nodeattr ("mem_mode" )
410- mstyle = self .get_nodeattr ("ram_style" )
411- if (mmode == "internal_decoupled" and mstyle == "distributed" ) or (
412- mmode == "internal_embedded" and self .calc_wmem () <= 128
413- ):
414- c2 = (P * Q * W ) * math .ceil (self .calc_wmem () / 64 )
415-
416- # multiplication
417- res_type = self .get_nodeattr ("resType" )
418- if res_type == "dsp" :
419- mult_luts = 0
420- else :
421- mult_luts = Q * (2 * math .ceil ((W + A ) / 6 ) - 1 ) * (W + A )
422- # adder tree
423- addertree_luts = (W + A ) * (2 * Q - 1 )
424- # accumulator
425- acc_datatype = self .get_accumulator_datatype ()
426- acc_bits = acc_datatype .bitwidth ()
427- k_h , k_w = self .get_nodeattr ("Kernel" )
428- # if accDataType is not set, then it will default to INT32, which would
429- # be a large overestimate in most (if not all) cases. In this scenario,
430- # we would use the minimum accumulator as determined by the data types
431- # bound, derived in https://arxiv.org/abs/2301.13376
432- alpha = math .log (k_h * k_w , 2 ) + W + A - 1 - int (idt .signed ())
433- acc_bits = min (
434- acc_datatype .bitwidth (),
435- np .ceil (alpha + math .log (1 + pow (2 , - alpha ), 2 ) + 1 ),
436- )
437- acc_luts = acc_bits
438- # thresholds and threshold comparators
439- thr_luts = 0
440- comp_luts = 0
441- noact = self .get_nodeattr ("noActivation" )
442- # TODO - add 'ram_style_threshold' node attribute
443- if noact == 0 :
444- odt = self .get_output_datatype ()
445- B = odt .bitwidth ()
446- thr_luts = (2 ** B - 1 ) * acc_bits * self .calc_tmem () / 64
447- comp_luts = (2 ** B - 1 ) * acc_bits
448-
449- return int (
450- c0 + c1 * (P * (mult_luts + addertree_luts + acc_luts + thr_luts + comp_luts )) + c2
451- )
452-
453- def dsp_estimation (self ):
454- # multiplication
455- P = self .get_nodeattr ("PE" )
456- res_type = self .get_nodeattr ("resType" )
457- wdt = self .get_weight_datatype ()
458- W = wdt .bitwidth ()
459- idt = self .get_input_datatype ()
460- A = idt .bitwidth ()
461- if res_type == "dsp" :
462- mult_dsp = P * np .ceil ((W + A ) / 48 ) # TODO: more accurate modelling
463- else :
464- mult_dsp = 0
465- return int (mult_dsp )
466-
467389 def get_exp_cycles (self ):
468390 pe = self .get_nodeattr ("PE" )
469391 simd = self .get_nodeattr ("SIMD" )
0 commit comments