Skip to content

Commit e1913bc

Browse files
committed
Fix MixedVector
1 parent 6d2c6f9 commit e1913bc

File tree

4 files changed

+92
-62
lines changed

4 files changed

+92
-62
lines changed

paddle/fluid/framework/details/cow_ptr.h

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,31 +28,27 @@ class COWPtr {
2828
private:
2929
RefPtr m_sp;
3030

31-
void detach() {
32-
T* tmp = m_sp.get();
33-
if (!(tmp == nullptr || m_sp.unique())) {
34-
m_sp = RefPtr(new T(*tmp));
35-
}
36-
}
37-
3831
public:
3932
COWPtr() : m_sp(nullptr) {}
4033
explicit COWPtr(T* t) : m_sp(t) {}
41-
explicit COWPtr(const RefPtr& refptr) : m_sp(refptr) {}
4234

43-
const T& Data() const { return operator*(); }
35+
const T& Data() const { return *m_sp; }
4436

45-
T* MutableData() { return operator->(); }
37+
T* MutableData() {
38+
DetachIfNotUnique();
39+
return m_sp.get();
40+
}
4641

47-
const T& operator*() const { return *m_sp; }
48-
T& operator*() {
49-
detach();
50-
return *m_sp;
42+
void DetachIfNotUnique() {
43+
T* tmp = m_sp.get();
44+
if (!(tmp == nullptr || m_sp.unique())) {
45+
Detach();
46+
}
5147
}
52-
const T* operator->() const { return m_sp.operator->(); }
53-
T* operator->() {
54-
detach();
55-
return m_sp.operator->();
48+
49+
void Detach() {
50+
T* tmp = m_sp.get();
51+
m_sp = RefPtr(new T(*tmp));
5652
}
5753
};
5854
} // namespace details

paddle/fluid/framework/mixed_vector.h

Lines changed: 75 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <algorithm>
1818
#include <initializer_list>
1919
#include <memory>
20+
#include <mutex> // NOLINT
2021
#include <utility>
2122
#include <vector>
2223
#include "paddle/fluid/framework/details/cow_ptr.h"
@@ -51,6 +52,7 @@ struct CUDABuffer {
5152
ClearMemory();
5253
place_ = boost::get<platform::CUDAPlace>(place);
5354
data_ = memory::Alloc(place_, size);
55+
PADDLE_ENFORCE_NOT_NULL(data_);
5456
size_ = size;
5557
}
5658

@@ -62,7 +64,7 @@ struct CUDABuffer {
6264

6365
private:
6466
void ClearMemory() const {
65-
if (data_) {
67+
if (data_ != nullptr) {
6668
memory::Free(place_, data_);
6769
}
6870
}
@@ -89,6 +91,7 @@ class Vector {
8991
template <typename U>
9092
explicit VectorData(const std::vector<U> &dat)
9193
: cpu_(dat), flag_(kDataInCPU) {}
94+
~VectorData() {}
9295

9396
VectorData(const VectorData &o) {
9497
o.ImmutableCPU();
@@ -215,7 +218,7 @@ class Vector {
215218
size_t capacity() const { return cpu_.capacity(); }
216219

217220
// reserve data
218-
void reserve(size_t size) { cpu_.reserve(size); }
221+
void reserve(size_t size) const { cpu_.reserve(size); }
219222

220223
// implicit cast operator. Vector can be cast to std::vector implicitly.
221224
operator std::vector<T>() const {
@@ -229,6 +232,17 @@ class Vector {
229232
return cpu_ == other.cpu_;
230233
}
231234

235+
std::mutex &Mutex() const { return mtx_; }
236+
237+
std::unique_ptr<platform::CUDAPlace> CUDAPlace() const {
238+
if (gpu_.data_ == nullptr) {
239+
return nullptr;
240+
} else {
241+
return std::unique_ptr<platform::CUDAPlace>(
242+
new platform::CUDAPlace(gpu_.place_));
243+
}
244+
}
245+
232246
private:
233247
enum DataFlag {
234248
kDataInCPU = 0x01,
@@ -239,10 +253,15 @@ class Vector {
239253

240254
void CopyToCPU() const {
241255
// COPY GPU Data To CPU
256+
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
257+
platform::DeviceContextPool::Instance().Get(
258+
platform::Place(gpu_.place_)));
259+
auto stream = dev_ctx->stream();
242260
void *src = gpu_.data_;
243261
void *dst = cpu_.data();
244262
memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_,
245-
nullptr);
263+
stream);
264+
dev_ctx->Wait();
246265
}
247266

248267
void MutableCPU() {
@@ -260,7 +279,7 @@ class Vector {
260279
SetFlag(kDataInCUDA);
261280
} else if (IsInCUDA() &&
262281
!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
263-
CopyCUDADataToAnotherPlace(place);
282+
PADDLE_THROW("This situation should not happen");
264283
// Still dirty
265284
} else {
266285
// Dirty && DataInCUDA && Device is same
@@ -272,28 +291,21 @@ class Vector {
272291
CopyCPUDataToCUDA(place);
273292
SetFlag(kDataInCUDA);
274293
} else if (!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
275-
CopyCUDADataToAnotherPlace(place);
294+
PADDLE_THROW("This situation should not happen.");
276295
} else {
277296
// Not Dirty && DataInCUDA && Device is same
278297
// Do nothing.
279298
}
280299
}
281300
}
282-
void CopyCUDADataToAnotherPlace(const platform::Place &place) const {
283-
details::CUDABuffer tmp(place, gpu_.size_);
284-
const void *src = gpu_.data_;
285-
void *dst = tmp.data_;
286301

287-
memory::Copy(tmp.place_, dst, gpu_.place_, src, gpu_.size_, nullptr);
288-
gpu_.Swap(tmp);
289-
}
290302
void CopyCPUDataToCUDA(const platform::Place &place) const {
291303
void *src = cpu_.data();
292304
gpu_.Resize(place, cpu_.size() * sizeof(T));
293305
void *dst = gpu_.data_;
294-
auto stream = static_cast<platform::CUDADeviceContext *>(
295-
platform::DeviceContextPool::Instance().Get(place))
296-
->stream();
306+
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
307+
platform::DeviceContextPool::Instance().Get(place));
308+
auto stream = dev_ctx->stream();
297309
memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_,
298310
stream);
299311
}
@@ -319,6 +331,8 @@ class Vector {
319331
mutable std::vector<T> cpu_;
320332
mutable details::CUDABuffer gpu_;
321333
mutable int flag_;
334+
335+
mutable std::mutex mtx_;
322336
};
323337

324338
public:
@@ -350,81 +364,103 @@ class Vector {
350364
Vector(Vector<T> &&other) { m_ = std::move(other.m_); }
351365

352366
// CPU data access method. Mutable.
353-
T &operator[](size_t i) { return (*m_)[i]; }
367+
T &operator[](size_t i) { return (*m_.MutableData())[i]; }
354368

355369
// CPU data access method. Immutable.
356-
const T &operator[](size_t i) const { return (*m_)[i]; }
370+
const T &operator[](size_t i) const { return m_.Data()[i]; }
357371

358372
// std::vector iterator methods. Based on CPU data access method
359-
size_t size() const { return m_->size(); }
373+
size_t size() const { return m_.Data().size(); }
360374

361-
iterator begin() { return m_->begin(); }
375+
iterator begin() { return m_.MutableData()->begin(); }
362376

363-
iterator end() { return m_->end(); }
377+
iterator end() { return m_.MutableData()->end(); }
364378

365-
T &front() { return m_->front(); }
379+
T &front() { return m_.MutableData()->front(); }
366380

367-
T &back() { return m_->back(); }
381+
T &back() { return m_.MutableData()->back(); }
368382

369-
const_iterator begin() const { return m_->begin(); }
383+
const_iterator begin() const { return m_.Data().begin(); }
370384

371-
const_iterator end() const { return m_->end(); }
385+
const_iterator end() const { return m_.Data().end(); }
372386

373387
const_iterator cbegin() const { return begin(); }
374388

375389
const_iterator cend() const { return end(); }
376390

377-
const T &back() const { return m_->back(); }
391+
const T &back() const { return m_.Data().back(); }
378392

379-
T *data() { return m_->data(); }
393+
T *data() { return m_.MutableData()->data(); }
380394

381-
const T *data() const { return m_->data(); }
395+
const T *data() const { return m_.Data().data(); }
382396

383-
const T &front() const { return m_->front(); }
397+
const T &front() const { return m_.Data().front(); }
384398
// end of std::vector iterator methods
385399

386400
// assign this from iterator.
387401
// NOTE: the iterator must support `end-begin`
388402
template <typename Iter>
389403
void assign(Iter begin, Iter end) {
390-
m_->assign(begin, end);
404+
m_.MutableData()->assign(begin, end);
391405
}
392406

393407
// push_back. If the previous capacity is not enough, the memory will
394408
// double.
395-
void push_back(T elem) { m_->push_back(elem); }
409+
void push_back(T elem) { m_.MutableData()->push_back(elem); }
396410

397411
// extend a vector by iterator.
398412
// NOTE: the iterator must support end-begin
399413
template <typename It>
400414
void Extend(It begin, It end) {
401-
m_->Extend(begin, end);
415+
m_.MutableData()->Extend(begin, end);
402416
}
403417

404418
// resize the vector
405419
void resize(size_t size) {
406420
if (m_.Data().size() != size) {
407-
m_->resize(size);
421+
m_.MutableData()->resize(size);
408422
}
409423
}
410424

411425
// get cuda ptr. immutable
412426
const T *CUDAData(platform::Place place) const {
413-
return m_.Data().CUDAData(place);
427+
{
428+
auto &mtx = m_.Data().Mutex();
429+
std::lock_guard<std::mutex> guard(mtx);
430+
auto cuda_place = m_.Data().CUDAPlace();
431+
if (cuda_place == nullptr ||
432+
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
433+
return m_.Data().CUDAData(place);
434+
}
435+
}
436+
// If m_ contains CUDAData in a different place. Detach manually.
437+
m_.Detach();
438+
return CUDAData(place);
414439
}
415440

416441
// get cuda ptr. mutable
417442
T *CUDAMutableData(platform::Place place) {
418-
return m_->CUDAMutableData(place);
443+
{
444+
auto &mtx = m_.Data().Mutex();
445+
std::lock_guard<std::mutex> guard(mtx);
446+
auto cuda_place = m_.Data().CUDAPlace();
447+
if (cuda_place == nullptr ||
448+
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
449+
return m_.MutableData()->CUDAMutableData(place);
450+
}
451+
}
452+
// If m_ contains CUDAData in a different place. Detach manually.
453+
m_.Detach();
454+
return CUDAMutableData(place);
419455
}
420456

421457
// clear
422-
void clear() { m_->clear(); }
458+
void clear() { m_.MutableData()->clear(); }
423459

424-
size_t capacity() const { return m_->capacity(); }
460+
size_t capacity() const { return m_.Data().capacity(); }
425461

426462
// reserve data
427-
void reserve(size_t size) { m_->reserve(size); }
463+
void reserve(size_t size) { m_.Data().reserve(size); }
428464

429465
// the unify method to access CPU or CUDA data. immutable.
430466
const T *Data(platform::Place place) const {
@@ -445,7 +481,7 @@ class Vector {
445481
}
446482

447483
// implicit cast operator. Vector can be cast to std::vector implicitly.
448-
operator std::vector<T>() const { return *m_; }
484+
operator std::vector<T>() const { return m_.Data(); }
449485

450486
bool operator==(const Vector<T> &other) const {
451487
if (size() != other.size()) return false;
@@ -463,7 +499,7 @@ class Vector {
463499

464500
private:
465501
// Vector is an COW object.
466-
details::COWPtr<VectorData> m_;
502+
mutable details::COWPtr<VectorData> m_;
467503
};
468504

469505
#else // PADDLE_WITH_CUDA

paddle/fluid/operators/lookup_table_op.cu

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
127127
auto gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace());
128128

129129
// TODO(yuyang18): Strange code here.
130-
memory::Copy(platform::CPUPlace(),
131-
new_rows.CUDAMutableData(context.GetPlace()), gpu_place,
132-
ids_data, ids_num * sizeof(int64_t), stream);
133-
130+
memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()),
131+
gpu_place, ids_data, ids_num * sizeof(int64_t), stream);
134132
d_table->set_rows(new_rows);
135133

136134
auto *d_table_value = d_table->mutable_value();

paddle/fluid/operators/sgd_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
8989
PADDLE_ENFORCE_EQ(in_height, out_dims[0]);
9090

9191
auto& in_value = grad->value();
92-
framework::Vector<int64_t> in_rows(grad->rows());
92+
auto& in_rows = grad->rows();
9393

9494
int64_t in_row_numel = in_value.numel() / in_rows.size();
9595
PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height);

0 commit comments

Comments
 (0)