Skip to content

Commit d8d2dbc

Browse files
committed
further optimize im2col using variables
1 parent 5373fe2 commit d8d2dbc

File tree

1 file changed

+21
-14
lines changed

1 file changed

+21
-14
lines changed

paddle/fluid/operators/math/im2col_cfo_cpu.h

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,24 @@ inline void im2col_sh1sw1dh1dw1ph0pw0(const framework::Tensor& im,
8080
int col_matrix_width = output_width * output_height;
8181
int im_size = im_height * im_width;
8282
size_t copy_size = sizeof(T) * output_width;
83+
const T* im_data_oh = im_data;
84+
T* dst_data_oh = col_data;
8385
for (int oh = 0; oh < output_height; ++oh) {
84-
const T* im_data_start = im_data + oh * im_width;
85-
T* dst_data = col_data + oh * output_width;
86+
const T* src_data_ic = im_data_oh;
87+
T* dst_data = dst_data_oh;
8688
for (int ic = 0; ic < im_channels; ++ic) {
87-
const T* src_data = im_data_start + ic * im_size;
89+
const T* src_data = src_data_ic;
8890
for (int kh = 0; kh < filter_height; ++kh) {
8991
for (int kw = 0; kw < filter_width; ++kw) {
9092
std::memcpy(dst_data, src_data + kw, copy_size);
9193
dst_data = dst_data + col_matrix_width;
9294
}
9395
src_data = src_data + im_width;
9496
}
97+
src_data_ic = src_data_ic + im_size;
9598
}
99+
im_data_oh = im_data_oh + im_width;
100+
dst_data_oh = dst_data_oh + output_width;
96101
}
97102
}
98103

@@ -130,34 +135,36 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
130135
T* col_start_r = col_data + (filter_height - 1) * col_block_fh +
131136
col_matrix_width - output_width;
132137
for (int ic = 0; ic < im_channels; ++ic) {
133-
// TODO(TJ): move * outside
134-
T* dst_data_l = col_start_l + ic * col_block_ic;
135-
T* dst_data_r = col_start_r + ic * col_block_ic;
138+
T* dst_data_l = col_start_l;
139+
T* dst_data_r = col_start_r;
136140
for (int kw = 0; kw < filter_width; ++kw) {
137141
std::memset(dst_data_l, 0, copy_size);
138142
std::memset(dst_data_r, 0, copy_size);
139143
dst_data_l = dst_data_l + col_matrix_width;
140144
dst_data_r = dst_data_r + col_matrix_width;
141145
}
146+
col_start_l = col_start_l + col_block_ic;
147+
col_start_r = col_start_r + col_block_ic;
142148
}
143149
}
144150

145151
auto pad = static_cast<T>(0);
146152
if (filter_width == 1) {
147153
// fill width padding
154+
T* dst_data_ic = col_data;
148155
for (int ic = 0; ic < im_channels; ++ic) {
149-
// TODO(TJ): move * outside
150-
T* dst_data_ic = col_data + ic * col_block_ic;
156+
T* dst_data_kh = dst_data_ic;
151157
for (int kh = 0; kh < filter_height; ++kh) {
152-
// TODO(TJ): move * outside
153-
T* dst_data = dst_data_ic + kh * col_block_fh;
158+
T* dst_data = dst_data_kh;
154159
for (int oh = 0; oh < output_height; ++oh) {
155160
*dst_data = pad;
156161
dst_data = dst_data + output_width - 1;
157162
*dst_data = pad;
158163
++dst_data;
159164
}
165+
dst_data_kh = dst_data_kh + col_block_fh;
160166
}
167+
dst_data_ic = dst_data_ic + col_block_ic;
161168
}
162169
// fill core
163170
size_t copy_size = sizeof(T) * (output_width - plw - prw);
@@ -184,12 +191,10 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
184191

185192
// filter_width != 1
186193
// fill width padding
194+
T* dst_data_ic = col_data;
187195
for (int ic = 0; ic < im_channels; ++ic) {
188-
// TODO(TJ): move * outside
189-
T* dst_data_ic = col_data + ic * col_block_ic;
196+
T* dst_data_kh = dst_data_ic;
190197
for (int kh = 0; kh < filter_height; ++kh) {
191-
// TODO(TJ): move * outside
192-
T* dst_data_kh = dst_data_ic + kh * col_block_fh;
193198
for (T* dst_data :
194199
{dst_data_kh, dst_data_kh + (filter_width - prw) * col_matrix_width +
195200
output_width - 1}) {
@@ -199,7 +204,9 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
199204
dst_data = dst_data + output_width;
200205
}
201206
}
207+
dst_data_kh = dst_data_kh + col_block_fh;
202208
}
209+
dst_data_ic = dst_data_ic + col_block_ic;
203210
}
204211

205212
// TODO(TJ): use array like: size_t copy_size[kw]={sizeof(T) *

0 commit comments

Comments
 (0)