@@ -107,6 +107,8 @@ template<typename T>
107107T* Unidirectional_broadcast (const T* original_data, const std::vector<size_t > original_shape, const std::vector<size_t > target_shape);
108108std::string Clean_name (std::string input_tensor_name);
109109
110+ // / compute stride of a tensor given its shape (assume layout is row-major)
111+ std::vector<size_t > ComputeStrideFromShape (const std::vector<size_t > & shape);
110112
111113// / function to check if a >> 0 and a < MAX using a single comparison
112114// // use trick casting to unsigned values so it becomes a single comparison
@@ -115,23 +117,23 @@ inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
115117}
116118
117119
118- // / im2col : efficient function to re-arrange input data of convolution to a matrix
120+ // / im2col : efficient function to re-arrange input data of convolution to a matrix
119121// / that can be used by BLAS
120122// / Use trick to loop on each element of filtered region first and follow input data layout
121123// / By doing this reads and writes are of consecutive data in memory and one gains in efficiency
122- // / The resulting matrix will be already transposed and can be used directly in BLAS
124+ // / The resulting matrix will be already transposed and can be used directly in BLAS
123125// / since output will be a matrix : (channels*kernel_h*kernel_w , output_h*output_w)
124- // / Example: with an input matrix
125- // / a1 a2 a3
126- // / b1 b2 b3 and a 2x2 kernel (k1,k2,k3,k4) and padding 1 :
127- // / c1 c2 c3
128- // / outpout will be a matrix (4 x 16)
129- // / the routine will follow output order :
126+ // / Example: with an input matrix
127+ // / a1 a2 a3
128+ // / b1 b2 b3 and a 2x2 kernel (k1,k2,k3,k4) and padding 1 :
129+ // / c1 c2 c3
130+ // / outpout will be a matrix (4 x 16)
131+ // / the routine will follow output order :
130132// first all elements which will be operated by k1 then k2 then k3
131133// / -> ( 0 0 0 0 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 ) all elements for k1
132- // / ( 0 0 0 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 ) for k2
133- // / ( 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 0 0 0 ) for k3
134- // / ( a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 0 0 0 0 ) for k4
134+ // / ( 0 0 0 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 ) for k2
135+ // / ( 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 0 0 0 ) for k3
136+ // / ( a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 0 0 0 0 ) for k4
135137// /
136138
137139template <typename T>
@@ -171,11 +173,11 @@ void Im2col(const T *data_im, const int channels, const int height, const int wi
171173
172174// / 3d implementation
173175template <typename T>
174- void Im2col_3d (const T *data_im, const int channels,
175- const int depth, const int height, const int width,
176- const int kernel_d, const int kernel_h, const int kernel_w,
177- const int pad_d, const int pad_h, const int pad_w,
178- const int stride_d, const int stride_h, const int stride_w,
176+ void Im2col_3d (const T *data_im, const int channels,
177+ const int depth, const int height, const int width,
178+ const int kernel_d, const int kernel_h, const int kernel_w,
179+ const int pad_d, const int pad_h, const int pad_w,
180+ const int stride_d, const int stride_h, const int stride_w,
179181 const int dilation_d, const int dilation_h, const int dilation_w, T *data_col)
180182{
181183 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1 ) + 1 )) / stride_h + 1 ;
@@ -201,7 +203,7 @@ void Im2col_3d(const T *data_im, const int channels,
201203 if (!is_a_ge_zero_and_a_lt_b (input_row, height)) {
202204 for (int output_cols = output_w; output_cols; output_cols--) {
203205 *(data_col++) = 0 ;
204- }
206+ }
205207 } else {
206208 int input_col = -pad_w + kernel_col * dilation_w;
207209 for (int output_col = output_w; output_col; output_col--) {
0 commit comments