@@ -63,22 +63,6 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
6363 size_t kernel_in_channels = kernel_.get_shape ()[2 ];
6464 size_t kernel_out_channels = kernel_.get_shape ()[3 ];
6565
66- size_t out_height =
67- (in_height + 2 * pads_ - (dilations_ * (kernel_height - 1 ) + 1 )) /
68- stride_ +
69- 1 ;
70- size_t out_width =
71- (in_width + 2 * pads_ - (dilations_ * (kernel_width - 1 ) + 1 )) /
72- stride_ +
73- 1 ;
74-
75- std::vector<std::vector<std::vector<std::vector<int >>>> output_tensor (
76- batch_size,
77- std::vector<std::vector<std::vector<int >>>(
78- out_height,
79- std::vector<std::vector<int >>(
80- out_width, std::vector<int >(kernel_out_channels, 0 ))));
81-
8266 std::vector<int > t = *input.as <int >();
8367 std::vector<std::vector<std::vector<std::vector<int >>>> input_tensor (
8468 batch_size,
@@ -111,6 +95,8 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
11195 kernel[n_index][h_index][w_index][c_index] = t1[index];
11296 }
11397
98+ pads_ = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
99+
114100 std::vector<std::vector<std::vector<std::vector<int >>>> padded_input =
115101 input_tensor;
116102 if (pads_ > 0 ) {
@@ -134,38 +120,93 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
134120 }
135121 }
136122
123+ std::vector<std::vector<std::vector<std::vector<int >>>> dil_kernel =
124+ kernel;
125+ if (dilations_ > 0 ) {
126+ dil_kernel = std::vector<std::vector<std::vector<std::vector<int >>>>(
127+ kernel_height * (1 + 2 * dilations_),
128+ std::vector<std::vector<std::vector<int >>>(
129+ kernel_width * (1 + 2 * dilations_),
130+ std::vector<std::vector<int >>(
131+ kernel_in_channels,
132+ std::vector<int >(kernel_out_channels, 0 ))));
133+
134+ for (size_t b = 0 ; b < kernel_out_channels; ++b) {
135+ for (size_t h = 0 ; h < kernel_height; ++h) {
136+ for (size_t w = 0 ; w < kernel_width; ++w) {
137+ for (size_t c = 0 ; c < kernel_in_channels; ++c) {
138+ dil_kernel[(h * (1 + 2 * dilations_)) + dilations_]
139+ [(w * (1 + 2 * dilations_)) + dilations_][c][b] =
140+ kernel[h][w][c][b];
141+ }
142+ }
143+ }
144+ }
145+ }
146+
147+ size_t crat = 0 ;
148+ if ((in_height + 2 * pads_ -
149+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) %
150+ stride_ !=
151+ 0 )
152+ crat = 1 ;
153+
154+ size_t out_height = (in_height + 2 * pads_ -
155+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) /
156+ stride_ +
157+ crat;
158+
159+ crat = 0 ;
160+
161+ if ((in_width + 2 * pads_ -
162+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) %
163+ stride_ !=
164+ 0 )
165+ crat = 1 ;
166+
167+ size_t out_width = (in_width + 2 * pads_ -
168+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) /
169+ stride_ +
170+ crat;
171+
172+ std::vector<std::vector<std::vector<std::vector<int >>>> output_tensor (
173+ batch_size,
174+ std::vector<std::vector<std::vector<int >>>(
175+ out_height,
176+ std::vector<std::vector<int >>(
177+ out_width, std::vector<int >(kernel_out_channels, 0 ))));
178+ size_t one_size = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
179+
137180 for (size_t b = 0 ; b < batch_size; ++b) {
138- for (size_t oc = 0 ; oc < kernel_out_channels; ++oc ) {
139- for (size_t i = 0 ; i < out_height; ++i ) {
140- for (size_t j = 0 ; j < out_width; ++j ) {
181+ for (size_t c = 0 ; c < kernel_out_channels; ++c ) {
182+ for (size_t i = 0 ; i < out_height; i += stride_ ) {
183+ for (size_t j = 0 ; j < out_width; j += stride_ ) {
141184 int value = 0 ;
142- for (size_t kh = 0 ; kh < kernel_height; ++kh) {
143- for (size_t kw = 0 ; kw < kernel_width; ++kw) {
144- for (size_t ic = 0 ; ic < in_channels; ++ic) {
145- size_t vert_start = i * stride_ + kh * dilations_;
146- size_t horiz_start = j * stride_ + kw * dilations_;
147-
148- if (vert_start < padded_input[0 ].size () &&
149- horiz_start < padded_input[0 ][0 ].size ()) {
150- value += padded_input[b][vert_start][horiz_start][ic] *
151- kernel[kh][kw][ic][oc];
152- }
185+ for (size_t ic = 0 ; ic < in_channels; ++ic) {
186+ for (int h = (-1 * static_cast <int >(one_size));
187+ h <= static_cast <int >(one_size); ++h) {
188+ for (int w = (-1 * static_cast <int >(one_size));
189+ w <= static_cast <int >(one_size); ++w) {
190+ value += padded_input[b][i + one_size + h]
191+ [j + one_size + w][ic] *
192+ dil_kernel[one_size + h][one_size + w][ic][c];
153193 }
154194 }
155195 }
156- output_tensor[b][i][j][oc ] = value;
196+ output_tensor[b][i][j][c ] = value;
157197 }
158198 }
159199 }
160200 }
161- Shape sh ({batch_size, out_height, out_width, kernel_out_channels});
201+
202+ Shape sh ({batch_size, kernel_out_channels, out_height, out_width});
162203 std::vector<int > one_d_vector (batch_size * out_height * out_width *
163204 kernel_out_channels);
164205 size_t index_1d = 0 ;
165206 for (size_t i = 0 ; i < batch_size; ++i) {
166- for (size_t j = 0 ; j < out_height ; ++j ) {
167- for (size_t k = 0 ; k < out_width ; ++k ) {
168- for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
207+ for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
208+ for (size_t j = 0 ; j < out_height ; ++j ) {
209+ for (size_t k = 0 ; k < out_width ; ++k ) {
169210 one_d_vector[index_1d++] = output_tensor[i][j][k][l];
170211 }
171212 }
@@ -234,28 +275,12 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
234275 size_t kernel_in_channels = kernel_.get_shape ()[2 ];
235276 size_t kernel_out_channels = kernel_.get_shape ()[3 ];
236277
237- size_t out_height =
238- (in_height + 2 * pads_ - (dilations_ * (kernel_height - 1 ) + 1 )) /
239- stride_ +
240- 1 ;
241- size_t out_width =
242- (in_width + 2 * pads_ - (dilations_ * (kernel_width - 1 ) + 1 )) /
243- stride_ +
244- 1 ;
245-
246- std::vector<std::vector<std::vector<std::vector<float >>>> output_tensor (
247- batch_size,
248- std::vector<std::vector<std::vector<float >>>(
249- out_height,
250- std::vector<std::vector<float >>(
251- out_width, std::vector<float >(kernel_out_channels, 0 ))));
252-
253278 std::vector<float > t = *input.as <float >();
254279 std::vector<std::vector<std::vector<std::vector<float >>>> input_tensor (
255- batch_size, std::vector<std::vector<std::vector< float >>>(
256- in_height, std::vector<std::vector<float >>(
257- in_width , std::vector<float >(
258- in_channels, 1.0 ))));
280+ batch_size,
281+ std::vector<std::vector<std::vector< float > >>(
282+ in_height , std::vector<std::vector< float > >(
283+ in_width, std::vector< float >( in_channels, 1 ))));
259284 for (size_t index = 0 ; index < t.size (); ++index) {
260285 size_t n_index = index / (in_height * in_width * in_channels);
261286 size_t h_index = (index / (in_width * in_channels)) % in_height;
@@ -268,10 +293,9 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
268293 std::vector<std::vector<std::vector<std::vector<float >>>> kernel (
269294 kernel_height,
270295 std::vector<std::vector<std::vector<float >>>(
271- kernel_width,
272- std::vector<std::vector<float >>(
273- kernel_in_channels,
274- std::vector<float >(kernel_out_channels, 1.0 ))));
296+ kernel_width, std::vector<std::vector<float >>(
297+ kernel_in_channels,
298+ std::vector<float >(kernel_out_channels, 1 ))));
275299 for (size_t index = 0 ; index < t1.size (); ++index) {
276300 size_t n_index =
277301 index / (kernel_width * kernel_in_channels * kernel_out_channels);
@@ -283,6 +307,8 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
283307 kernel[n_index][h_index][w_index][c_index] = t1[index];
284308 }
285309
310+ pads_ = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
311+
286312 std::vector<std::vector<std::vector<std::vector<float >>>> padded_input =
287313 input_tensor;
288314 if (pads_ > 0 ) {
@@ -306,38 +332,94 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
306332 }
307333 }
308334
335+ std::vector<std::vector<std::vector<std::vector<float >>>> dil_kernel =
336+ kernel;
337+ if (dilations_ > 0 ) {
338+ dil_kernel =
339+ std::vector<std::vector<std::vector<std::vector<float >>>>(
340+ kernel_height * (1 + 2 * dilations_),
341+ std::vector<std::vector<std::vector<float >>>(
342+ kernel_width * (1 + 2 * dilations_),
343+ std::vector<std::vector<float >>(
344+ kernel_in_channels,
345+ std::vector<float >(kernel_out_channels, 0 ))));
346+
347+ for (size_t b = 0 ; b < kernel_out_channels; ++b) {
348+ for (size_t h = 0 ; h < kernel_height; ++h) {
349+ for (size_t w = 0 ; w < kernel_width; ++w) {
350+ for (size_t c = 0 ; c < kernel_in_channels; ++c) {
351+ dil_kernel[(h * (1 + 2 * dilations_)) + dilations_]
352+ [(w * (1 + 2 * dilations_)) + dilations_][c][b] =
353+ kernel[h][w][c][b];
354+ }
355+ }
356+ }
357+ }
358+ }
359+
360+ size_t crat = 0 ;
361+ if ((in_height + 2 * pads_ -
362+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) %
363+ stride_ !=
364+ 0 )
365+ crat = 1 ;
366+
367+ size_t out_height = (in_height + 2 * pads_ -
368+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) /
369+ stride_ +
370+ crat;
371+
372+ crat = 0 ;
373+
374+ if ((in_width + 2 * pads_ -
375+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) %
376+ stride_ !=
377+ 0 )
378+ crat = 1 ;
379+
380+ size_t out_width = (in_width + 2 * pads_ -
381+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) /
382+ stride_ +
383+ crat;
384+
385+ std::vector<std::vector<std::vector<std::vector<float >>>> output_tensor (
386+ batch_size,
387+ std::vector<std::vector<std::vector<float >>>(
388+ out_height,
389+ std::vector<std::vector<float >>(
390+ out_width, std::vector<float >(kernel_out_channels, 0 ))));
391+ size_t one_size = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
392+
309393 for (size_t b = 0 ; b < batch_size; ++b) {
310- for (size_t oc = 0 ; oc < kernel_out_channels; ++oc) {
311- for (size_t i = 0 ; i < out_height; ++i) {
312- for (size_t j = 0 ; j < out_width; ++j) {
313- float value = 0.0 ;
314- for (size_t kh = 0 ; kh < kernel_height; ++kh) {
315- for (size_t kw = 0 ; kw < kernel_width; ++kw) {
316- for (size_t ic = 0 ; ic < in_channels; ++ic) {
317- size_t vert_start = i * stride_ + kh * dilations_;
318- size_t horiz_start = j * stride_ + kw * dilations_;
319-
320- if (vert_start < padded_input[0 ].size () &&
321- horiz_start < padded_input[0 ][0 ].size ()) {
322- value += padded_input[b][vert_start][horiz_start][ic] *
323- kernel[kh][kw][ic][oc];
324- }
394+ for (size_t c = 0 ; c < kernel_out_channels; ++c) {
395+ for (size_t i = 0 ; i < out_height; i += stride_) {
396+ for (size_t j = 0 ; j < out_width; j += stride_) {
397+ float value = 0 ;
398+ for (size_t ic = 0 ; ic < in_channels; ++ic) {
399+ for (int h = (-1 * static_cast <int >(one_size));
400+ h <= static_cast <int >(one_size); ++h) {
401+ for (int w = (-1 * static_cast <int >(one_size));
402+ w <= static_cast <int >(one_size); ++w) {
403+ value += padded_input[b][i + one_size + h]
404+ [j + one_size + w][ic] *
405+ dil_kernel[one_size + h][one_size + w][ic][c];
325406 }
326407 }
327408 }
328- output_tensor[b][i][j][oc ] = value;
409+ output_tensor[b][i][j][c ] = value;
329410 }
330411 }
331412 }
332413 }
333- Shape sh ({batch_size, out_height, out_width, kernel_out_channels});
414+
415+ Shape sh ({batch_size, kernel_out_channels, out_height, out_width});
334416 std::vector<float > one_d_vector (batch_size * out_height * out_width *
335417 kernel_out_channels);
336418 size_t index_1d = 0 ;
337419 for (size_t i = 0 ; i < batch_size; ++i) {
338- for (size_t j = 0 ; j < out_height ; ++j ) {
339- for (size_t k = 0 ; k < out_width ; ++k ) {
340- for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
420+ for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
421+ for (size_t j = 0 ; j < out_height ; ++j ) {
422+ for (size_t k = 0 ; k < out_width ; ++k ) {
341423 one_d_vector[index_1d++] = output_tensor[i][j][k][l];
342424 }
343425 }
0 commit comments