Skip to content

Commit a009272

Browse files
authored
inference/unify output buffer management (#11569)
1 parent 5f0c780 commit a009272

7 files changed

+121
-51
lines changed

paddle/contrib/inference/demo/simple_on_word2vec.cc

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,9 @@ void Main(bool use_gpu) {
4040
//# 2. Prepare input.
4141
int64_t data[4] = {1, 2, 3, 4};
4242

43-
PaddleBuf buf{.data = data, .length = sizeof(data)};
4443
PaddleTensor tensor{.name = "",
4544
.shape = std::vector<int>({4, 1}),
46-
.data = buf,
45+
.data = PaddleBuf(data, sizeof(data)),
4746
.dtype = PaddleDType::INT64};
4847

4948
// For simplicity, we set all the slots with the same data.
@@ -55,14 +54,12 @@ void Main(bool use_gpu) {
5554

5655
//# 4. Get output.
5756
ASSERT_EQ(outputs.size(), 1UL);
58-
LOG(INFO) << "output buffer size: " << outputs.front().data.length;
59-
const size_t num_elements = outputs.front().data.length / sizeof(float);
57+
LOG(INFO) << "output buffer size: " << outputs.front().data.length();
58+
const size_t num_elements = outputs.front().data.length() / sizeof(float);
6059
// The outputs' buffers are in CPU memory.
6160
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
62-
LOG(INFO) << static_cast<float*>(outputs.front().data.data)[i];
61+
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i];
6362
}
64-
// TODO(Superjomn): this is should be free automatically
65-
free(outputs[0].data.data);
6663
}
6764
}
6865

@@ -86,10 +83,9 @@ void MainThreads(int num_threads, bool use_gpu) {
8683
for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
8784
// 2. Dummy Input Data
8885
int64_t data[4] = {1, 2, 3, 4};
89-
PaddleBuf buf{.data = data, .length = sizeof(data)};
9086
PaddleTensor tensor{.name = "",
9187
.shape = std::vector<int>({4, 1}),
92-
.data = buf,
88+
.data = PaddleBuf(data, sizeof(data)),
9389
.dtype = PaddleDType::INT64};
9490
std::vector<PaddleTensor> inputs(4, tensor);
9591
std::vector<PaddleTensor> outputs;
@@ -99,13 +95,13 @@ void MainThreads(int num_threads, bool use_gpu) {
9995
// 4. Get output.
10096
ASSERT_EQ(outputs.size(), 1UL);
10197
LOG(INFO) << "TID: " << tid << ", "
102-
<< "output buffer size: " << outputs.front().data.length;
103-
const size_t num_elements = outputs.front().data.length / sizeof(float);
98+
<< "output buffer size: " << outputs.front().data.length();
99+
const size_t num_elements =
100+
outputs.front().data.length() / sizeof(float);
104101
// The outputs' buffers are in CPU memory.
105102
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
106-
LOG(INFO) << static_cast<float*>(outputs.front().data.data)[i];
103+
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i];
107104
}
108-
free(outputs[0].data.data);
109105
}
110106
});
111107
}

paddle/contrib/inference/paddle_inference_api.cc

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,53 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/contrib/inference/paddle_inference_api.h"
16+
17+
namespace paddle {
18+
19+
PaddleBuf::PaddleBuf(PaddleBuf&& other)
20+
: data_(other.data_),
21+
length_(other.length_),
22+
memory_owned_(other.memory_owned_) {
23+
other.memory_owned_ = false;
24+
other.data_ = nullptr;
25+
other.length_ = 0;
26+
}
27+
28+
PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; }
29+
30+
PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
31+
// only the buffer with external memory can be copied
32+
assert(!other.memory_owned_);
33+
data_ = other.data_;
34+
length_ = other.length_;
35+
memory_owned_ = other.memory_owned_;
36+
return *this;
37+
}
38+
39+
void PaddleBuf::Resize(size_t length) {
40+
// Only the owned memory can be reset, the external memory can't be changed.
41+
if (length_ == length) return;
42+
assert(memory_owned_);
43+
Free();
44+
data_ = new char[length];
45+
length_ = length;
46+
memory_owned_ = true;
47+
}
48+
49+
void PaddleBuf::Reset(void* data, size_t length) {
50+
Free();
51+
memory_owned_ = false;
52+
data_ = data;
53+
length_ = length;
54+
}
55+
56+
void PaddleBuf::Free() {
57+
if (memory_owned_ && data_) {
58+
assert(length_ > 0);
59+
delete static_cast<char*>(data_);
60+
data_ = nullptr;
61+
length_ = 0;
62+
}
63+
}
64+
65+
} // namespace paddle

paddle/contrib/inference/paddle_inference_api.h

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ limitations under the License. */
2121

2222
#pragma once
2323

24+
#include <cassert>
2425
#include <memory>
2526
#include <string>
2627
#include <vector>
@@ -32,12 +33,38 @@ enum PaddleDType {
3233
INT64,
3334
};
3435

35-
struct PaddleBuf {
36-
void* data; // pointer to the data memory.
37-
size_t length; // number of memory bytes.
36+
class PaddleBuf {
37+
public:
38+
PaddleBuf() = default;
39+
PaddleBuf(PaddleBuf&& other);
40+
// Copy only available when memory is managed externally.
41+
explicit PaddleBuf(const PaddleBuf&);
42+
PaddleBuf& operator=(const PaddleBuf&);
43+
// Do not own the memory.
44+
PaddleBuf(void* data, size_t length)
45+
: data_(data), length_(length), memory_owned_{false} {}
46+
// Own memory.
47+
PaddleBuf(size_t length)
48+
: data_(new char[length]), length_(length), memory_owned_(true) {}
49+
// Resize to `length` bytes.
50+
void Resize(size_t length);
51+
// Reset to external memory.
52+
void Reset(void* data, size_t length);
53+
bool empty() const { return length_ == 0; }
54+
void* data() const { return data_; }
55+
size_t length() const { return length_; }
56+
57+
~PaddleBuf() { Free(); }
58+
59+
private:
60+
void Free();
61+
void* data_{nullptr}; // pointer to the data memory.
62+
size_t length_{0}; // number of memory bytes.
63+
bool memory_owned_{true};
3864
};
3965

4066
struct PaddleTensor {
67+
PaddleTensor() = default;
4168
std::string name; // variable name.
4269
std::vector<int> shape;
4370
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
@@ -67,8 +94,9 @@ class PaddlePredictor {
6794

6895
// Predict an record.
6996
// The caller should be responsible for allocating and releasing the memory of
70-
// `inputs`. `inputs` should be alive until Run returns. caller should be
71-
// responsible for releasing the memory of `output_data`.
97+
// `inputs`. `inputs` should be available until Run returns. Caller should be
98+
// responsible for the output tensor's buffer, either allocated or passed from
99+
// outside.
72100
virtual bool Run(const std::vector<PaddleTensor>& inputs,
73101
std::vector<PaddleTensor>* output_data) = 0;
74102

paddle/contrib/inference/paddle_inference_api_anakin_engine.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run(
4848
auto d_tensor_in_p = executor_.get_in(input.name);
4949
float *d_data_p = d_tensor_in_p->mutable_data();
5050
if (cudaMemcpy(d_data_p,
51-
static_cast<float *>(input.data.data),
51+
static_cast<float *>(input.data.data()),
5252
d_tensor_in_p->valid_size() * sizeof(float),
5353
cudaMemcpyHostToDevice) != 0) {
5454
LOG(ERROR) << "copy data from CPU to GPU error";
@@ -65,8 +65,11 @@ bool PaddleInferenceAnakinPredictor::Run(
6565
for (auto &output : *output_data) {
6666
auto *tensor = executor_.get_out(output.name);
6767
output.shape = tensor->shape();
68+
if (output.data.length() < tensor->valid_size() * sizeof(float)) {
69+
output.data.Resize(tensor->valid_size() * sizeof(float));
70+
}
6871
// Copy data from GPU -> CPU
69-
if (cudaMemcpy(output.data.data,
72+
if (cudaMemcpy(output.data.data(),
7073
tensor->mutable_data(),
7174
tensor->valid_size() * sizeof(float),
7275
cudaMemcpyDeviceToHost) != 0) {

paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,26 @@ TEST(inference, anakin) {
3737

3838
float data[1 * 3 * 224 * 224] = {1.0f};
3939

40-
PaddleBuf buf{.data = data, .length = sizeof(data)};
4140
PaddleTensor tensor{.name = "input_0",
4241
.shape = std::vector<int>({1, 3, 224, 224}),
43-
.data = buf,
42+
.data = PaddleBuf(data, sizeof(data)),
4443
.dtype = PaddleDType::FLOAT32};
4544

4645
// For simplicity, we set all the slots with the same data.
47-
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
46+
std::vector<PaddleTensor> paddle_tensor_feeds;
47+
paddle_tensor_feeds.emplace_back(std::move(tensor));
4848

49-
float data_out[1000];
50-
51-
PaddleBuf buf_out{.data = data_out, .length = sizeof(data)};
5249
PaddleTensor tensor_out{.name = "prob_out",
5350
.shape = std::vector<int>({1000, 1}),
54-
.data = buf_out,
51+
.data = PaddleBuf(),
5552
.dtype = PaddleDType::FLOAT32};
5653

57-
std::vector<PaddleTensor> outputs(1, tensor_out);
54+
std::vector<PaddleTensor> outputs;
55+
outputs.emplace_back(std::move(tensor_out));
5856

5957
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
6058

61-
float* data_o = static_cast<float*>(outputs[0].data.data);
59+
float* data_o = static_cast<float*>(outputs[0].data.data());
6260
for (size_t j = 0; j < 1000; ++j) {
6361
LOG(INFO) << "output[" << j << "]: " << data_o[j];
6462
}

paddle/contrib/inference/paddle_inference_api_impl.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
178178

179179
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
180180
std::memcpy(static_cast<void *>(input_ptr),
181-
inputs[i].data.data,
182-
inputs[i].data.length);
181+
inputs[i].data.data(),
182+
inputs[i].data.length());
183183
feeds->push_back(input);
184184
}
185185
return true;
@@ -241,10 +241,11 @@ bool NativePaddlePredictor::GetFetch(
241241
}
242242

243243
outputs->at(i).shape = shape;
244-
outputs->at(i).data.length = sizeof(float) * data.size();
245-
outputs->at(i).data.data = malloc(outputs->at(i).data.length);
246-
std::memcpy(
247-
outputs->at(i).data.data, data.data(), outputs->at(i).data.length);
244+
auto &buffer = outputs->at(i).data;
245+
if (buffer.empty() || buffer.length() < sizeof(float) * data.size()) {
246+
buffer.Resize(sizeof(float) * data.size());
247+
}
248+
std::memcpy(buffer.data(), data.data(), buffer.length());
248249
outputs->at(i).dtype = PaddleDType::FLOAT32;
249250
// TODO(panyx0718): support other types? fill tensor name? avoid a copy.
250251
}

paddle/contrib/inference/test_paddle_inference_api_impl.cc

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,12 @@ namespace paddle {
2727

2828
PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
2929
PaddleTensor pt;
30-
pt.data.data = t->data<void>();
3130

3231
if (t->type() == typeid(int64_t)) {
33-
pt.data.length = t->numel() * sizeof(int64_t);
32+
pt.data.Reset(t->data<void>(), t->numel() * sizeof(int64_t));
3433
pt.dtype = PaddleDType::INT64;
3534
} else if (t->type() == typeid(float)) {
36-
pt.data.length = t->numel() * sizeof(float);
35+
pt.data.Reset(t->data<void>(), t->numel() * sizeof(float));
3736
pt.dtype = PaddleDType::FLOAT32;
3837
} else {
3938
LOG(FATAL) << "unsupported type.";
@@ -79,8 +78,8 @@ void MainWord2Vec(bool use_gpu) {
7978
std::vector<PaddleTensor> outputs;
8079
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
8180
ASSERT_EQ(outputs.size(), 1UL);
82-
size_t len = outputs[0].data.length;
83-
float* data = static_cast<float*>(outputs[0].data.data);
81+
size_t len = outputs[0].data.length();
82+
float* data = static_cast<float*>(outputs[0].data.data());
8483
for (size_t j = 0; j < len / sizeof(float); ++j) {
8584
ASSERT_LT(data[j], 1.0);
8685
ASSERT_GT(data[j], -1.0);
@@ -103,8 +102,6 @@ void MainWord2Vec(bool use_gpu) {
103102
EXPECT_LT(lod_data[i] - data[i], 1e-3);
104103
EXPECT_GT(lod_data[i] - data[i], -1e-3);
105104
}
106-
107-
free(outputs[0].data.data);
108105
}
109106

110107
void MainImageClassification(bool use_gpu) {
@@ -143,13 +140,12 @@ void MainImageClassification(bool use_gpu) {
143140
std::vector<PaddleTensor> outputs;
144141
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
145142
ASSERT_EQ(outputs.size(), 1UL);
146-
size_t len = outputs[0].data.length;
147-
float* data = static_cast<float*>(outputs[0].data.data);
143+
size_t len = outputs[0].data.length();
144+
float* data = static_cast<float*>(outputs[0].data.data());
148145
float* lod_data = output1.data<float>();
149146
for (size_t j = 0; j < len / sizeof(float); ++j) {
150147
EXPECT_NEAR(lod_data[j], data[j], 1e-3);
151148
}
152-
free(data);
153149
}
154150

155151
void MainThreadsWord2Vec(bool use_gpu) {
@@ -192,8 +188,8 @@ void MainThreadsWord2Vec(bool use_gpu) {
192188

193189
// check outputs range
194190
ASSERT_EQ(local_outputs.size(), 1UL);
195-
const size_t len = local_outputs[0].data.length;
196-
float* data = static_cast<float*>(local_outputs[0].data.data);
191+
const size_t len = local_outputs[0].data.length();
192+
float* data = static_cast<float*>(local_outputs[0].data.data());
197193
for (size_t j = 0; j < len / sizeof(float); ++j) {
198194
ASSERT_LT(data[j], 1.0);
199195
ASSERT_GT(data[j], -1.0);
@@ -205,7 +201,6 @@ void MainThreadsWord2Vec(bool use_gpu) {
205201
for (int i = 0; i < refs[tid].numel(); ++i) {
206202
EXPECT_NEAR(ref_data[i], data[i], 1e-3);
207203
}
208-
free(data);
209204
});
210205
}
211206
for (int i = 0; i < num_jobs; ++i) {
@@ -251,14 +246,13 @@ void MainThreadsImageClassification(bool use_gpu) {
251246

252247
// check outputs correctness
253248
ASSERT_EQ(local_outputs.size(), 1UL);
254-
const size_t len = local_outputs[0].data.length;
255-
float* data = static_cast<float*>(local_outputs[0].data.data);
249+
const size_t len = local_outputs[0].data.length();
250+
float* data = static_cast<float*>(local_outputs[0].data.data());
256251
float* ref_data = refs[tid].data<float>();
257252
EXPECT_EQ(refs[tid].numel(), len / sizeof(float));
258253
for (int i = 0; i < refs[tid].numel(); ++i) {
259254
EXPECT_NEAR(ref_data[i], data[i], 1e-3);
260255
}
261-
free(data);
262256
});
263257
}
264258
for (int i = 0; i < num_jobs; ++i) {

0 commit comments

Comments
 (0)