Skip to content

Commit 8d6be4f

Browse files
committed
refine im2col test and add benchmark
1 parent 507c143 commit 8d6be4f

File tree

1 file changed

+73
-67
lines changed

1 file changed

+73
-67
lines changed

paddle/fluid/operators/math/im2col_test.cc

Lines changed: 73 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ limitations under the License. */
1414

1515
#include "paddle/fluid/operators/math/im2col.h"
1616
#include <gtest/gtest.h>
17+
#include <sys/time.h>
1718
#include <vector>
19+
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
1820

1921
template <typename DeviceContext, typename Place>
2022
void testIm2col() {
@@ -160,82 +162,86 @@ void testIm2col() {
160162
delete context;
161163
}
162164

165+
TEST(math, im2col) {
166+
testIm2col<paddle::platform::CPUDeviceContext, paddle::platform::CPUPlace>();
167+
#ifdef PADDLE_WITH_CUDA
168+
testIm2col<paddle::platform::CUDADeviceContext,
169+
paddle::platform::CUDAPlace>();
170+
#endif
171+
}
172+
173+
#define PREPARE_IM2COL_CPU \
174+
paddle::platform::CPUPlace place; \
175+
paddle::platform::CPUDeviceContext context(place); \
176+
paddle::framework::Tensor input; \
177+
paddle::framework::Tensor out; \
178+
paddle::framework::Tensor ref; \
179+
std::vector<int> padding({ph, pw}); \
180+
std::vector<int> stride({1, 1}); \
181+
std::vector<int> dilation({1, 1}); \
182+
float* input_ptr = input.mutable_data<float>({ic, ih, iw}, place); \
183+
for (int i = 0; i < input.numel(); ++i) { \
184+
input_ptr[i] = static_cast<float>(i + 1); \
185+
} \
186+
int output_height = (ih - fh + padding[0] * 2) / stride[0] + 1; \
187+
int output_width = (iw - fw + padding[1] * 2) / stride[1] + 1; \
188+
out.mutable_data<float>({ic, fh, fw, output_height, output_width}, place); \
189+
ref.mutable_data<float>({ic, fh, fw, output_height, output_width}, place); \
190+
paddle::operators::math::Im2ColFunctor< \
191+
paddle::operators::math::ColFormat::kCFO, \
192+
paddle::platform::CPUDeviceContext, float> \
193+
im2col
194+
163195
void testIm2colCPU(int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
164-
paddle::framework::Tensor input;
165-
paddle::framework::Tensor output;
166-
paddle::framework::Tensor ref_output;
167-
std::vector<int> padding({ph, pw});
168-
std::vector<int> stride({1, 1}); // stride_y, stride_x
169-
std::vector<int> dilation({1, 1}); // dilation_y, dilation_x
170-
int output_height = (ih - fh + padding[0] * 2) / stride[0] + 1;
171-
int output_width = (iw - fw + padding[1] * 2) / stride[1] + 1;
172-
float* input_ptr =
173-
input.mutable_data<float>({ic, ih, iw}, paddle::platform::CPUPlace());
174-
for (int i = 0; i < input.numel(); ++i) {
175-
input_ptr[i] = static_cast<float>(i + 1);
196+
PREPARE_IM2COL_CPU;
197+
198+
im2col(context, input, dilation, stride, padding, &out);
199+
paddle::operators::math::im2col_common<float>(input, dilation, stride,
200+
padding, &ref);
201+
202+
float* ref_data = ref.data<float>();
203+
float* out_data = out.data<float>();
204+
for (int i = 0; i < out.numel(); ++i) {
205+
EXPECT_EQ(out_data[i], ref_data[i]);
176206
}
207+
}
177208

178-
paddle::platform::CPUPlace place;
179-
paddle::platform::CPUDeviceContext context(place);
180-
output.mutable_data<float>({ic, fh, fw, output_height, output_width}, place);
181-
ref_output.mutable_data<float>({ic, fh, fw, output_height, output_width},
182-
place);
183-
paddle::operators::math::Im2ColFunctor<
184-
paddle::operators::math::ColFormat::kCFO,
185-
paddle::platform::CPUDeviceContext, float>
186-
im2col;
187-
im2col(context, input, dilation, stride, padding, &output);
188-
auto ref_im2col = [&](
189-
const paddle::framework::Tensor& im, const std::vector<int>& dilation,
190-
const std::vector<int>& stride, const std::vector<int>& padding,
191-
paddle::framework::Tensor* col) {
192-
int im_channels = im.dims()[0];
193-
int im_height = im.dims()[1];
194-
int im_width = im.dims()[2];
195-
int filter_height = col->dims()[1];
196-
int filter_width = col->dims()[2];
197-
int output_height = col->dims()[3];
198-
int output_width = col->dims()[4];
199-
int channels_col = im_channels * filter_height * filter_width;
200-
201-
const float* im_data = im.data<float>();
202-
float* col_data = col->data<float>();
203-
for (int c = 0; c < channels_col; ++c) {
204-
int w_offset = c % filter_width;
205-
int h_offset = (c / filter_width) % filter_height;
206-
int c_im = c / (filter_width * filter_height);
207-
for (int h = 0; h < output_height; ++h) {
208-
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
209-
for (int w = 0; w < output_width; ++w) {
210-
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
211-
int col_idx = (c * output_height + h) * output_width + w;
212-
int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
213-
col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
214-
im_col_idx < 0 || im_col_idx >= im_width)
215-
? 0.f
216-
: im_data[im_idx];
217-
}
218-
}
219-
}
209+
void benchIm2col(int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
210+
PREPARE_IM2COL_CPU;
211+
constexpr int repeat = 30;
212+
auto GetCurrentMs = []() -> double {
213+
struct timeval time;
214+
gettimeofday(&time, NULL);
215+
return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec;
220216
};
217+
auto t1 = GetCurrentMs();
218+
for (int i = 0; i < repeat; ++i) {
219+
im2col(context, input, dilation, stride, padding, &out);
220+
}
221+
auto t2 = GetCurrentMs();
221222

222-
ref_im2col(input, dilation, stride, padding, &ref_output);
223-
224-
float* out_cfo_ptr = output.data<float>();
225-
float* out_ref_ptr = ref_output.data<float>();
226-
for (int i = 0; i < output.numel(); ++i) {
227-
EXPECT_EQ(out_cfo_ptr[i], out_ref_ptr[i]);
223+
for (int i = 0; i < repeat; ++i) {
224+
paddle::operators::math::im2col_common<float>(input, dilation, stride,
225+
padding, &ref);
228226
}
227+
auto t3 = GetCurrentMs();
228+
229+
LOG(INFO) << "before: " << (t3 - t2) / repeat
230+
<< ",after: " << (t2 - t1) / repeat;
229231
}
230232

231-
TEST(math, im2col) {
232-
testIm2col<paddle::platform::CPUDeviceContext, paddle::platform::CPUPlace>();
233-
testIm2colCPU(/*ic*/ 3, /*ih*/ 5, /*iw*/ 5, /*fh*/ 3, /*fw*/ 2, /*ph*/ 0,
233+
TEST(math, im2col_cputest) {
234+
testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 4, /*fh*/ 3, /*fw*/ 3, /*ph*/ 0,
234235
/*pw*/ 0);
235236
testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 4, /*fh*/ 3, /*fw*/ 3, /*ph*/ 1,
236237
/*pw*/ 1);
237-
#ifdef PADDLE_WITH_CUDA
238-
testIm2col<paddle::platform::CUDADeviceContext,
239-
paddle::platform::CUDAPlace>();
240-
#endif
238+
239+
benchIm2col(/*ic*/ 3, /*ih*/ 224, /*iw*/ 224, /*fh*/ 3, /*fw*/ 3, /*ph*/ 1,
240+
/*pw*/ 1);
241+
benchIm2col(/*ic*/ 3, /*ih*/ 224, /*iw*/ 224, /*fh*/ 3, /*fw*/ 3, /*ph*/ 0,
242+
/*pw*/ 0);
243+
benchIm2col(/*ic*/ 3, /*ih*/ 224, /*iw*/ 224, /*fh*/ 5, /*fw*/ 5, /*ph*/ 1,
244+
/*pw*/ 1);
245+
benchIm2col(/*ic*/ 3, /*ih*/ 224, /*iw*/ 224, /*fh*/ 5, /*fw*/ 5, /*ph*/ 0,
246+
/*pw*/ 0);
241247
}

0 commit comments

Comments
 (0)