@@ -80,19 +80,24 @@ inline void im2col_sh1sw1dh1dw1ph0pw0(const framework::Tensor& im,
80
80
int col_matrix_width = output_width * output_height;
81
81
int im_size = im_height * im_width;
82
82
size_t copy_size = sizeof (T) * output_width;
83
+ const T* im_data_oh = im_data;
84
+ T* dst_data_oh = col_data;
83
85
for (int oh = 0 ; oh < output_height; ++oh) {
84
- const T* im_data_start = im_data + oh * im_width ;
85
- T* dst_data = col_data + oh * output_width ;
86
+ const T* src_data_ic = im_data_oh ;
87
+ T* dst_data = dst_data_oh ;
86
88
for (int ic = 0 ; ic < im_channels; ++ic) {
87
- const T* src_data = im_data_start + ic * im_size ;
89
+ const T* src_data = src_data_ic ;
88
90
for (int kh = 0 ; kh < filter_height; ++kh) {
89
91
for (int kw = 0 ; kw < filter_width; ++kw) {
90
92
std::memcpy (dst_data, src_data + kw, copy_size);
91
93
dst_data = dst_data + col_matrix_width;
92
94
}
93
95
src_data = src_data + im_width;
94
96
}
97
+ src_data_ic = src_data_ic + im_size;
95
98
}
99
+ im_data_oh = im_data_oh + im_width;
100
+ dst_data_oh = dst_data_oh + output_width;
96
101
}
97
102
}
98
103
@@ -130,34 +135,36 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
130
135
T* col_start_r = col_data + (filter_height - 1 ) * col_block_fh +
131
136
col_matrix_width - output_width;
132
137
for (int ic = 0 ; ic < im_channels; ++ic) {
133
- // TODO(TJ): move * outside
134
- T* dst_data_l = col_start_l + ic * col_block_ic;
135
- T* dst_data_r = col_start_r + ic * col_block_ic;
138
+ T* dst_data_l = col_start_l;
139
+ T* dst_data_r = col_start_r;
136
140
for (int kw = 0 ; kw < filter_width; ++kw) {
137
141
std::memset (dst_data_l, 0 , copy_size);
138
142
std::memset (dst_data_r, 0 , copy_size);
139
143
dst_data_l = dst_data_l + col_matrix_width;
140
144
dst_data_r = dst_data_r + col_matrix_width;
141
145
}
146
+ col_start_l = col_start_l + col_block_ic;
147
+ col_start_r = col_start_r + col_block_ic;
142
148
}
143
149
}
144
150
145
151
auto pad = static_cast <T>(0 );
146
152
if (filter_width == 1 ) {
147
153
// fill width padding
154
+ T* dst_data_ic = col_data;
148
155
for (int ic = 0 ; ic < im_channels; ++ic) {
149
- // TODO(TJ): move * outside
150
- T* dst_data_ic = col_data + ic * col_block_ic;
156
+ T* dst_data_kh = dst_data_ic;
151
157
for (int kh = 0 ; kh < filter_height; ++kh) {
152
- // TODO(TJ): move * outside
153
- T* dst_data = dst_data_ic + kh * col_block_fh;
158
+ T* dst_data = dst_data_kh;
154
159
for (int oh = 0 ; oh < output_height; ++oh) {
155
160
*dst_data = pad;
156
161
dst_data = dst_data + output_width - 1 ;
157
162
*dst_data = pad;
158
163
++dst_data;
159
164
}
165
+ dst_data_kh = dst_data_kh + col_block_fh;
160
166
}
167
+ dst_data_ic = dst_data_ic + col_block_ic;
161
168
}
162
169
// fill core
163
170
size_t copy_size = sizeof (T) * (output_width - plw - prw);
@@ -184,12 +191,10 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
184
191
185
192
// filter_width != 1
186
193
// fill width padding
194
+ T* dst_data_ic = col_data;
187
195
for (int ic = 0 ; ic < im_channels; ++ic) {
188
- // TODO(TJ): move * outside
189
- T* dst_data_ic = col_data + ic * col_block_ic;
196
+ T* dst_data_kh = dst_data_ic;
190
197
for (int kh = 0 ; kh < filter_height; ++kh) {
191
- // TODO(TJ): move * outside
192
- T* dst_data_kh = dst_data_ic + kh * col_block_fh;
193
198
for (T* dst_data :
194
199
{dst_data_kh, dst_data_kh + (filter_width - prw) * col_matrix_width +
195
200
output_width - 1 }) {
@@ -199,7 +204,9 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
199
204
dst_data = dst_data + output_width;
200
205
}
201
206
}
207
+ dst_data_kh = dst_data_kh + col_block_fh;
202
208
}
209
+ dst_data_ic = dst_data_ic + col_block_ic;
203
210
}
204
211
205
212
// TODO(TJ): use array like: size_t copy_size[kw]={sizeof(T) *
0 commit comments