@@ -14,7 +14,9 @@ limitations under the License. */
1414
1515#include " paddle/fluid/operators/math/im2col.h"
1616#include < gtest/gtest.h>
17+ #include < sys/time.h>
1718#include < vector>
19+ #include " paddle/fluid/operators/math/im2col_cfo_cpu.h"
1820
1921template <typename DeviceContext, typename Place>
2022void testIm2col () {
@@ -160,82 +162,86 @@ void testIm2col() {
160162 delete context;
161163}
162164
165+ TEST (math, im2col) {
166+ testIm2col<paddle::platform::CPUDeviceContext, paddle::platform::CPUPlace>();
167+ #ifdef PADDLE_WITH_CUDA
168+ testIm2col<paddle::platform::CUDADeviceContext,
169+ paddle::platform::CUDAPlace>();
170+ #endif
171+ }
172+
173+ #define PREPARE_IM2COL_CPU \
174+ paddle::platform::CPUPlace place; \
175+ paddle::platform::CPUDeviceContext context (place); \
176+ paddle::framework::Tensor input; \
177+ paddle::framework::Tensor out; \
178+ paddle::framework::Tensor ref; \
179+ std::vector<int > padding ({ph, pw}); \
180+ std::vector<int > stride ({1 , 1 }); \
181+ std::vector<int > dilation ({1 , 1 }); \
182+ float * input_ptr = input.mutable_data<float >({ic, ih, iw}, place); \
183+ for (int i = 0 ; i < input.numel(); ++i) { \
184+ input_ptr[i] = static_cast <float >(i + 1 ); \
185+ } \
186+ int output_height = (ih - fh + padding[0 ] * 2 ) / stride[0 ] + 1 ; \
187+ int output_width = (iw - fw + padding[1 ] * 2 ) / stride[1 ] + 1 ; \
188+ out.mutable_data<float >({ic, fh, fw, output_height, output_width}, place); \
189+ ref.mutable_data<float >({ic, fh, fw, output_height, output_width}, place); \
190+ paddle::operators::math::Im2ColFunctor< \
191+ paddle::operators::math::ColFormat::kCFO , \
192+ paddle::platform::CPUDeviceContext, float > \
193+ im2col
194+
163195void testIm2colCPU (int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
164- paddle::framework::Tensor input;
165- paddle::framework::Tensor output;
166- paddle::framework::Tensor ref_output;
167- std::vector<int > padding ({ph, pw});
168- std::vector<int > stride ({1 , 1 }); // stride_y, stride_x
169- std::vector<int > dilation ({1 , 1 }); // dilation_y, dilation_x
170- int output_height = (ih - fh + padding[0 ] * 2 ) / stride[0 ] + 1 ;
171- int output_width = (iw - fw + padding[1 ] * 2 ) / stride[1 ] + 1 ;
172- float * input_ptr =
173- input.mutable_data <float >({ic, ih, iw}, paddle::platform::CPUPlace ());
174- for (int i = 0 ; i < input.numel (); ++i) {
175- input_ptr[i] = static_cast <float >(i + 1 );
196+ PREPARE_IM2COL_CPU;
197+
198+ im2col (context, input, dilation, stride, padding, &out);
199+ paddle::operators::math::im2col_common<float >(input, dilation, stride,
200+ padding, &ref);
201+
202+ float * ref_data = ref.data <float >();
203+ float * out_data = out.data <float >();
204+ for (int i = 0 ; i < out.numel (); ++i) {
205+ EXPECT_EQ (out_data[i], ref_data[i]);
176206 }
207+ }
177208
178- paddle::platform::CPUPlace place;
179- paddle::platform::CPUDeviceContext context (place);
180- output.mutable_data <float >({ic, fh, fw, output_height, output_width}, place);
181- ref_output.mutable_data <float >({ic, fh, fw, output_height, output_width},
182- place);
183- paddle::operators::math::Im2ColFunctor<
184- paddle::operators::math::ColFormat::kCFO ,
185- paddle::platform::CPUDeviceContext, float >
186- im2col;
187- im2col (context, input, dilation, stride, padding, &output);
188- auto ref_im2col = [&](
189- const paddle::framework::Tensor& im, const std::vector<int >& dilation,
190- const std::vector<int >& stride, const std::vector<int >& padding,
191- paddle::framework::Tensor* col) {
192- int im_channels = im.dims ()[0 ];
193- int im_height = im.dims ()[1 ];
194- int im_width = im.dims ()[2 ];
195- int filter_height = col->dims ()[1 ];
196- int filter_width = col->dims ()[2 ];
197- int output_height = col->dims ()[3 ];
198- int output_width = col->dims ()[4 ];
199- int channels_col = im_channels * filter_height * filter_width;
200-
201- const float * im_data = im.data <float >();
202- float * col_data = col->data <float >();
203- for (int c = 0 ; c < channels_col; ++c) {
204- int w_offset = c % filter_width;
205- int h_offset = (c / filter_width) % filter_height;
206- int c_im = c / (filter_width * filter_height);
207- for (int h = 0 ; h < output_height; ++h) {
208- int im_row_idx = h * stride[0 ] - padding[0 ] + h_offset * dilation[0 ];
209- for (int w = 0 ; w < output_width; ++w) {
210- int im_col_idx = w * stride[1 ] - padding[1 ] + w_offset * dilation[1 ];
211- int col_idx = (c * output_height + h) * output_width + w;
212- int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
213- col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
214- im_col_idx < 0 || im_col_idx >= im_width)
215- ? 0 .f
216- : im_data[im_idx];
217- }
218- }
219- }
209+ void benchIm2col (int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
210+ PREPARE_IM2COL_CPU;
211+ constexpr int repeat = 30 ;
212+ auto GetCurrentMs = []() -> double {
213+ struct timeval time;
214+ gettimeofday (&time, NULL );
215+ return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec ;
220216 };
217+ auto t1 = GetCurrentMs ();
218+ for (int i = 0 ; i < repeat; ++i) {
219+ im2col (context, input, dilation, stride, padding, &out);
220+ }
221+ auto t2 = GetCurrentMs ();
221222
222- ref_im2col (input, dilation, stride, padding, &ref_output);
223-
224- float * out_cfo_ptr = output.data <float >();
225- float * out_ref_ptr = ref_output.data <float >();
226- for (int i = 0 ; i < output.numel (); ++i) {
227- EXPECT_EQ (out_cfo_ptr[i], out_ref_ptr[i]);
223+ for (int i = 0 ; i < repeat; ++i) {
224+ paddle::operators::math::im2col_common<float >(input, dilation, stride,
225+ padding, &ref);
228226 }
227+ auto t3 = GetCurrentMs ();
228+
229+ LOG (INFO) << " before: " << (t3 - t2) / repeat
230+ << " ,after: " << (t2 - t1) / repeat;
229231}
230232
231- TEST (math, im2col) {
232- testIm2col<paddle::platform::CPUDeviceContext, paddle::platform::CPUPlace>();
233- testIm2colCPU (/* ic*/ 3 , /* ih*/ 5 , /* iw*/ 5 , /* fh*/ 3 , /* fw*/ 2 , /* ph*/ 0 ,
233+ TEST (math, im2col_cputest) {
234+ testIm2colCPU (/* ic*/ 2 , /* ih*/ 5 , /* iw*/ 4 , /* fh*/ 3 , /* fw*/ 3 , /* ph*/ 0 ,
234235 /* pw*/ 0 );
235236 testIm2colCPU (/* ic*/ 2 , /* ih*/ 5 , /* iw*/ 4 , /* fh*/ 3 , /* fw*/ 3 , /* ph*/ 1 ,
236237 /* pw*/ 1 );
237- #ifdef PADDLE_WITH_CUDA
238- testIm2col<paddle::platform::CUDADeviceContext,
239- paddle::platform::CUDAPlace>();
240- #endif
238+
239+ benchIm2col (/* ic*/ 3 , /* ih*/ 224 , /* iw*/ 224 , /* fh*/ 3 , /* fw*/ 3 , /* ph*/ 1 ,
240+ /* pw*/ 1 );
241+ benchIm2col (/* ic*/ 3 , /* ih*/ 224 , /* iw*/ 224 , /* fh*/ 3 , /* fw*/ 3 , /* ph*/ 0 ,
242+ /* pw*/ 0 );
243+ benchIm2col (/* ic*/ 3 , /* ih*/ 224 , /* iw*/ 224 , /* fh*/ 5 , /* fw*/ 5 , /* ph*/ 1 ,
244+ /* pw*/ 1 );
245+ benchIm2col (/* ic*/ 3 , /* ih*/ 224 , /* iw*/ 224 , /* fh*/ 5 , /* fw*/ 5 , /* ph*/ 0 ,
246+ /* pw*/ 0 );
241247}
0 commit comments