Skip to content

Commit 3d56786

Browse files
authored
Merge pull request #5247 from qingqing01/memory_alignment
Allocate aligned memory by posix_memalign.
2 parents 2649221 + e88e196 commit 3d56786

File tree

4 files changed

+17
-8
lines changed

4 files changed

+17
-8
lines changed

paddle/memory/detail/system_allocator.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,16 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) {
4141

4242
index = 0; // unlock memory
4343

44-
void* p = malloc(size);
44+
void* p;
45+
46+
#ifdef PADDLE_USE_MKLDNN
47+
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
48+
// memory alignment
49+
PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0);
50+
#else
51+
PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0);
52+
#endif
53+
PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);
4554

4655
if (p != nullptr) {
4756
if (FLAGS_use_pinned_memory) {

paddle/operators/nccl_op_test.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) {
185185
recv_tensor.numel() * sizeof(float),
186186
static_cast<p::CUDADeviceContext *>(dev_ctxs[i])->stream());
187187

188-
for (size_t j = 0; j < f::product(kDims); ++j) {
188+
for (int64_t j = 0; j < f::product(kDims); ++j) {
189189
ASSERT_NEAR(ct[j], result, 1e-5);
190190
}
191191
}
@@ -234,7 +234,7 @@ TEST_F(NCCLTester, ncclReduceOp) {
234234
recv_tensor.numel() * sizeof(float),
235235
static_cast<p::CUDADeviceContext *>(dev_ctxs[kRoot])->stream());
236236

237-
for (int j = 0; j < f::product(kDims); ++j) {
237+
for (int64_t j = 0; j < f::product(kDims); ++j) {
238238
ASSERT_NEAR(ct[j], result, 1e-5);
239239
}
240240
}
@@ -282,7 +282,7 @@ TEST_F(NCCLTester, ncclBcastOp) {
282282
recv_tensor.numel() * sizeof(float),
283283
static_cast<p::CUDADeviceContext *>(dev_ctxs[idx])->stream());
284284

285-
for (size_t j = 0; j < f::product(kDims); ++j) {
285+
for (int64_t j = 0; j < f::product(kDims); ++j) {
286286
ASSERT_NEAR(ct[j], result, 1e-5);
287287
}
288288
}

paddle/operators/reshape_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
3636
PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty.");
3737
auto x_dims = ctx->GetInputDim("X");
3838
// TODO(qiao) change batch_size
39-
for (int i = 1; i < shape.size(); ++i) {
39+
for (size_t i = 1; i < shape.size(); ++i) {
4040
PADDLE_ENFORCE(shape[i] > 0,
4141
"Each dimension of shape "
4242
"must be positiv except the first.");

paddle/operators/save_load_op_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ TEST(SaveLoadOp, CPU) {
3434

3535
tensor->set_lod(expect_lod);
3636
int* expect = tensor->mutable_data<int>(place);
37-
for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) {
37+
for (int64_t i = 0; i < tensor->numel(); ++i) {
3838
expect[i] = static_cast<int>(i);
3939
}
4040
paddle::framework::AttributeMap attrs;
@@ -50,7 +50,7 @@ TEST(SaveLoadOp, CPU) {
5050
"load", {}, {{"Out", {"out_var"}}}, attrs);
5151
load_op->Run(scope, ctx);
5252
int* actual = target->data<int>();
53-
for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) {
53+
for (int64_t i = 0; i < tensor->numel(); ++i) {
5454
EXPECT_EQ(expect[i], actual[i]);
5555
}
5656
auto& actual_lod = target->lod();
@@ -60,4 +60,4 @@ TEST(SaveLoadOp, CPU) {
6060
EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]);
6161
}
6262
}
63-
}
63+
}

0 commit comments

Comments
 (0)