@@ -430,6 +430,26 @@ def _compute_conv1d_im2col(self, input_shape, kernel=3, stride=1, pad=(0,0), dil
430430
431431
432432 def generate_conv1d_line_buffer_fn (self , layer_idx , n_partitions , in_W , in_C , kernel = 3 , stride = 1 , pad = 0 , dilation = 1 ):
433+ """Generate a C++ function that mimics the im2col algorithm. This function works for 1D convolution.
434+
435+ The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
436+ to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
437+ the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
438+ we need to do this for every convolution layer.
439+
440+ Args:
441+ layer_idx (int): Index of layer ('index' attribute).
442+ n_partitions (int): Number of partitions to divide the input into. The pixels in each partition will be processed in parallel.
443+ in_W (int): Width of input.
444+ in_C (int): Number of channels.
445+ kernel (int, optional): Size of the kernel. Defaults to 3.
446+ stride (int, optional): Stride length. Defaults to 1.
447+ pad (int or Iterable, optional): Padding to apply. Specified as either a number or a list [left_pad, right_pad]. Defaults to 0.
448+ dilation (int, optional): Dilation rate. Defaults to 1.
449+
450+ Returns:
451+ str: Generated C++ function
452+ """
433453 if isinstance (pad , Iterable ):
434454 pad_left = pad [0 ]
435455 pad_right = pad [1 ]
@@ -510,6 +530,28 @@ def _compute_conv2d_im2col(self, input_shape, kernel=(3, 3), stride=(1, 1), pad=
510530
511531
512532 def generate_conv2d_line_buffer_fn (self , layer_idx , n_partitions , in_H , in_W , in_C , kernel = (3 , 3 ), stride = (1 , 1 ), pad = (0 , 0 , 0 , 0 ), dilation = (1 , 1 )):
533+ """Generate a C++ function that mimics the im2col algorithm. This function works for 2D convolution.
534+
535+ The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
536+ to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
537+ the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
538+ we need to do this for every convolution layer.
539+
540+ Args:
541+ layer_idx (int): Index of layer ('index' attribute).
542+ n_partitions (int): Number of partitions to divide the input into. The pixels in each partition will be processed in parallel.
543+ in_H (int): Height of input.
544+ in_W (int): Width of input.
545+ in_C (int): Number of channels.
546+ kernel (int or Iterable, optional): Size of the kernel. Defaults to (3,3).
547+ stride (int or Iterable, optional): Stride length. Defaults to (1,1).
548+ pad (int or Iterable, optional): Padding to apply. Specified as either a number or a list [top_pad, bottom_pad, left_pad, right_pad]. Defaults to 0.
549+ dilation (int or Iterable, optional): Dilation rate. Defaults to (1,1).
550+
551+ Returns:
552+ str: Generated C++ function
553+ """
554+
513555 if isinstance (kernel , Iterable ):
514556 kernel_height = kernel [0 ]
515557 kernel_width = kernel [1 ]
0 commit comments