Skip to content

Commit a328ae3

Browse files
committed
Use posix_memalign to allocate aligned memory, since some SIMD instructions require the alignment of memory accesses.
1 parent ef2f0ec commit a328ae3

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

paddle/memory/detail/system_allocator.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,16 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) {
4141

4242
index = 0; // unlock memory
4343

44-
void* p = malloc(size);
44+
void* p;
45+
46+
#ifdef PADDLE_USE_MKLDNN
47+
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
48+
// memory alignment
49+
PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0);
50+
#else
51+
PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0);
52+
#endif
53+
PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);
4554

4655
if (p != nullptr) {
4756
if (FLAGS_use_pinned_memory) {

paddle/operators/reshape_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
3636
PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty.");
3737
auto x_dims = ctx->GetInputDim("X");
3838
// TODO(qiao) change batch_size
39-
for (int i = 1; i < shape.size(); ++i) {
39+
for (size_t i = 1; i < shape.size(); ++i) {
4040
PADDLE_ENFORCE(shape[i] > 0,
4141
"Each dimension of shape "
4242
"must be positiv except the first.");

paddle/operators/save_load_op_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ TEST(SaveLoadOp, CPU) {
3434

3535
tensor->set_lod(expect_lod);
3636
int* expect = tensor->mutable_data<int>(place);
37-
for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) {
37+
for (int64_t i = 0; i < tensor->numel(); ++i) {
3838
expect[i] = static_cast<int>(i);
3939
}
4040
paddle::framework::AttributeMap attrs;
@@ -50,7 +50,7 @@ TEST(SaveLoadOp, CPU) {
5050
"load", {}, {{"Out", {"out_var"}}}, attrs);
5151
load_op->Run(scope, ctx);
5252
int* actual = target->data<int>();
53-
for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) {
53+
for (int64_t i = 0; i < tensor->numel(); ++i) {
5454
EXPECT_EQ(expect[i], actual[i]);
5555
}
5656
auto& actual_lod = target->lod();
@@ -60,4 +60,4 @@ TEST(SaveLoadOp, CPU) {
6060
EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]);
6161
}
6262
}
63-
}
63+
}

0 commit comments

Comments
 (0)