Skip to content

Commit c3c3c0b

Browse files
committed
polish code, test=develop
1 parent 7389597 commit c3c3c0b

File tree

9 files changed

+176
-182
lines changed

9 files changed

+176
-182
lines changed

paddle/fluid/framework/mixed_vector.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -488,12 +488,6 @@ class CPUVector : public std::vector<T, std::allocator<T>> {
488488
return os;
489489
}
490490

491-
size_t size() const noexcept {
492-
size_t size =
493-
static_cast<size_t>(std::vector<T, std::allocator<T>>::size());
494-
return size;
495-
}
496-
497491
T &operator[](size_t id) { return this->at(id); }
498492

499493
const T &operator[](size_t id) const { return this->at(id); }

paddle/fluid/framework/selected_rows.cc

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,58 @@ bool SelectedRows::HasKey(int64_t key) const {
140140
: true;
141141
}
142142

143+
int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
144+
bool is_test) {
145+
if (is_test) {
146+
auto iter = id_to_index_.find(key);
147+
if (iter == id_to_index_.end()) {
148+
return -1;
149+
} else {
150+
return iter->second;
151+
}
152+
}
153+
154+
rwlock_->RDLock();
155+
auto iter = id_to_index_.find(key);
156+
if (iter == id_to_index_.end()) {
157+
rwlock_->UNLock();
158+
if (!auto_grown) {
159+
PADDLE_THROW("key %d not found", key);
160+
}
161+
rwlock_->WRLock();
162+
auto map_size = id_to_index_.size();
163+
auto vector_size = rows_.size();
164+
if (map_size != vector_size) {
165+
rwlock_->UNLock();
166+
PADDLE_THROW(
167+
"id_to_index_ size %d should have the same size with rows_ %d",
168+
map_size, vector_size);
169+
}
170+
auto write_iter = id_to_index_.find(key);
171+
if (write_iter == id_to_index_.end()) {
172+
int row_num = rows_.size();
173+
if (row_num == value_->dims()[0]) {
174+
rwlock_->UNLock();
175+
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
176+
}
177+
// key logic to put a key into id_to_index_
178+
rows_.push_back(key);
179+
auto index = static_cast<int64_t>(rows_.size() - 1);
180+
id_to_index_[key] = index;
181+
rwlock_->UNLock();
182+
return index;
183+
} else {
184+
auto index = write_iter->second;
185+
rwlock_->UNLock();
186+
return index;
187+
}
188+
} else {
189+
auto index = iter->second;
190+
rwlock_->UNLock();
191+
return index;
192+
}
193+
}
194+
143195
void SelectedRows::SyncIndex() {
144196
rwlock_->WRLock();
145197
id_to_index_.clear();

paddle/fluid/framework/selected_rows.h

Lines changed: 9 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -118,54 +118,17 @@ class SelectedRows {
118118
*
119119
* @return index of the key.
120120
*/
121-
inline int64_t AutoGrownIndex(int64_t key, bool auto_grown,
122-
bool is_test = false) {
123-
if (is_test) {
124-
auto iter = id_to_index_.find(key);
125-
if (iter == id_to_index_.end()) {
126-
return -1;
127-
} else {
128-
return iter->second;
129-
}
130-
}
131-
rwlock_->RDLock();
121+
int64_t AutoGrownIndex(int64_t key, bool auto_grown, bool is_test = false);
122+
123+
/*
124+
* @brief Get the index of the key from id_to_index_ map.
125+
*/
126+
inline int64_t GetIndexFromId(int64_t key) {
132127
auto iter = id_to_index_.find(key);
133128
if (iter == id_to_index_.end()) {
134-
rwlock_->UNLock();
135-
if (!auto_grown) {
136-
PADDLE_THROW("key %d not found", key);
137-
}
138-
rwlock_->WRLock();
139-
auto map_size = id_to_index_.size();
140-
auto vector_size = rows_.size();
141-
if (map_size != vector_size) {
142-
rwlock_->UNLock();
143-
PADDLE_THROW(
144-
"id_to_index_ size %d should have the same size with rows_ %d",
145-
map_size, vector_size);
146-
}
147-
auto write_iter = id_to_index_.find(key);
148-
if (write_iter == id_to_index_.end()) {
149-
int row_num = rows_.size();
150-
if (row_num == value_->dims()[0]) {
151-
rwlock_->UNLock();
152-
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
153-
}
154-
// key logic to put a key into id_to_index_
155-
rows_.push_back(key);
156-
auto index = static_cast<int64_t>(rows_.size() - 1);
157-
id_to_index_[key] = index;
158-
rwlock_->UNLock();
159-
return index;
160-
} else {
161-
auto index = write_iter->second;
162-
rwlock_->UNLock();
163-
return index;
164-
}
129+
return -1;
165130
} else {
166-
auto index = iter->second;
167-
rwlock_->UNLock();
168-
return index;
131+
return iter->second;
169132
}
170133
}
171134

@@ -185,7 +148,7 @@ class SelectedRows {
185148
// SelectedRows add a Tensor, will the duplicate rows be handled.
186149
Vector<int64_t> rows_;
187150
std::unordered_map<int64_t, int64_t>
188-
id_to_index_; // should not be used when ids has duplicate member
151+
id_to_index_; // should not be used when rows_ has duplicate member
189152
std::unique_ptr<Tensor> value_{nullptr};
190153
int64_t height_; // height indicates the underline tensor's height
191154
std::unique_ptr<RWLock> rwlock_{nullptr};

paddle/fluid/operators/hierarchical_sigmoid_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
101101
"it should have shape like [N, L], L is the length of the Path")
102102
.AsDispensable();
103103
AddInput(
104-
"PCode",
104+
"PathCode",
105105
"(LoDTensor, optional), The Code on each Node of the Path from root "
106106
"to current word"
107107
"it should have shape like [N, L], L is the length of the Path")

paddle/fluid/operators/hierarchical_sigmoid_op.h

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ limitations under the License. */
1919
#include "paddle/fluid/framework/mixed_vector.h"
2020
#include "paddle/fluid/framework/op_registry.h"
2121
#include "paddle/fluid/operators/clip_op.h"
22+
#include "paddle/fluid/operators/detail/safe_ref.h"
2223
#include "paddle/fluid/operators/math/math_function.h"
2324
#include "paddle/fluid/operators/math/matrix_bit_code.h"
2425
#include "paddle/fluid/platform/transform.h"
26+
2527
namespace paddle {
2628
namespace operators {
2729

@@ -30,31 +32,26 @@ template <typename T, int MajorType = Eigen::RowMajor,
3032
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
3133
using platform::Transform;
3234

33-
std::vector<int64_t> cal_rows(const framework::LoDTensor& path) {
34-
std::set<int64_t> tmp;
35-
std::vector<int64_t> rows;
36-
for (size_t i = 0; i < static_cast<size_t>(path.dims()[0]); i++) {
37-
for (size_t j = 0; j < static_cast<size_t>(path.dims()[1]); j++) {
38-
int64_t temp =
39-
path.data<int64_t>()[i * static_cast<size_t>(path.dims()[1]) + j];
40-
if (temp >= 0) {
41-
tmp.insert(temp);
42-
}
35+
static std::vector<int64_t> PathToRows(const framework::LoDTensor& path) {
36+
std::set<int64_t> rows;
37+
for (int64_t i = 0; i < path.numel(); ++i) {
38+
int64_t row = path.data<int64_t>()[i];
39+
if (row < 0) {
40+
continue;
4341
}
42+
rows.emplace(row);
4443
}
45-
rows.assign(tmp.begin(), tmp.end());
46-
return rows;
44+
return std::vector<int64_t>(rows.begin(), rows.end());
4745
}
48-
4946
template <typename DeviceContext, typename T>
5047
class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
5148
public:
5249
void Compute(const framework::ExecutionContext& ctx) const override {
53-
auto* in = ctx.Input<framework::LoDTensor>("X");
54-
auto* w = ctx.Input<framework::LoDTensor>("W");
50+
auto in = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
51+
auto w = detail::Ref(ctx.Input<framework::LoDTensor>("W"));
5552
auto* path = ctx.Input<framework::LoDTensor>("PTable");
56-
auto* code = ctx.Input<framework::LoDTensor>("PCode");
57-
auto* label = ctx.Input<framework::LoDTensor>("Label");
53+
auto* code = ctx.Input<framework::LoDTensor>("PathCode");
54+
auto label = detail::Ref(ctx.Input<framework::LoDTensor>("Label"));
5855
auto* bias = ctx.Input<framework::LoDTensor>("Bias");
5956
auto* out = ctx.Output<framework::LoDTensor>("Out");
6057
auto* pre_out = ctx.Output<framework::LoDTensor>("PreOut");
@@ -65,7 +62,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
6562
}
6663
int64_t code_length =
6764
path ? path->dims()[1] : math::FindLastSet(num_classes - 1);
68-
int64_t batch_size = in->dims()[0];
65+
int64_t batch_size = in.dims()[0];
6966
framework::LoDTensor sum;
7067
auto& dev_ctx = ctx.template device_context<DeviceContext>();
7168
auto* pre_out_data = pre_out->mutable_data<T>(
@@ -81,10 +78,10 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
8178
std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
8279
if (!is_custom) {
8380
bit_code.reset(new math::MatrixBitCodeFunctor<T>(num_classes,
84-
label->data<int64_t>()));
81+
label.data<int64_t>()));
8582
} else {
86-
bit_code.reset(new math::MatrixBitCodeFunctor<T>(path, code,
87-
label->data<int64_t>()));
83+
bit_code.reset(new math::MatrixBitCodeFunctor<T>(*path, *code,
84+
label.data<int64_t>()));
8885
}
8986

9087
std::vector<int64_t> sum_dims({batch_size, 1UL});
@@ -95,7 +92,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
9592
if (bias) {
9693
bit_code->Add(*bias, pre_out);
9794
}
98-
bit_code->Mul(pre_out, *w, *in);
95+
bit_code->Mul(pre_out, w, in);
9996
// clip to [-40, 40]
10097
Transform<DeviceContext> trans;
10198
trans(ctx.template device_context<DeviceContext>(), pre_out_data,
@@ -117,23 +114,23 @@ template <typename DeviceContext, typename T>
117114
class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
118115
public:
119116
void Compute(const framework::ExecutionContext& ctx) const override {
120-
auto* in = ctx.Input<framework::LoDTensor>("X");
121-
auto* w = ctx.Input<framework::LoDTensor>("W");
117+
auto in = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
118+
auto w = detail::Ref(ctx.Input<framework::LoDTensor>("W"));
122119
auto* path = ctx.Input<framework::LoDTensor>("PTable");
123-
auto* code = ctx.Input<framework::LoDTensor>("PCode");
120+
auto* code = ctx.Input<framework::LoDTensor>("PathCode");
124121
auto* bias = ctx.Input<framework::LoDTensor>("Bias");
125122
auto* in_grad =
126123
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
127124
bool is_sparse = ctx.Attr<bool>("is_sparse");
128125
auto& dev_ctx = ctx.template device_context<DeviceContext>();
129126
math::SetConstant<DeviceContext, T> zero;
130-
auto* label = ctx.Input<framework::LoDTensor>("Label");
131-
auto* pre_out = ctx.Input<framework::LoDTensor>("PreOut");
132-
auto* out_grad =
133-
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
127+
auto label = detail::Ref(ctx.Input<framework::LoDTensor>("Label"));
128+
auto pre_out = detail::Ref(ctx.Input<framework::LoDTensor>("PreOut"));
129+
auto out_grad = detail::Ref(
130+
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out")));
134131
framework::LoDTensor pre_out_grad;
135132

136-
pre_out_grad.mutable_data<T>(pre_out->dims(), ctx.GetPlace());
133+
pre_out_grad.mutable_data<T>(pre_out.dims(), ctx.GetPlace());
137134
in_grad->mutable_data<T>(ctx.GetPlace());
138135
zero(dev_ctx, in_grad, static_cast<T>(0.0));
139136

@@ -147,16 +144,16 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
147144
std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
148145
if (!is_custom) {
149146
bit_code.reset(new math::MatrixBitCodeFunctor<T>(num_classes,
150-
label->data<int64_t>()));
147+
label.data<int64_t>()));
151148
} else {
152-
bit_code.reset(new math::MatrixBitCodeFunctor<T>(path, code,
153-
label->data<int64_t>()));
149+
bit_code.reset(new math::MatrixBitCodeFunctor<T>(*path, *code,
150+
label.data<int64_t>()));
154151
}
155152

156153
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
157-
auto pre_out_mat = EigenMatrix<T>::From(*pre_out);
154+
auto pre_out_mat = EigenMatrix<T>::From(pre_out);
158155
auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad);
159-
auto out_grad_mat = EigenMatrix<T>::From(*out_grad);
156+
auto out_grad_mat = EigenMatrix<T>::From(out_grad);
160157

161158
Eigen::array<int, 2> bcast{1, static_cast<int>(pre_out_grad.dims()[1])};
162159

@@ -181,17 +178,17 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
181178
ctx.Output<framework::LoDTensor>(framework::GradVarName("W"));
182179
w_grad->mutable_data<T>(ctx.GetPlace());
183180
zero(dev_ctx, w_grad, static_cast<T>(0.0));
184-
bit_code->MulGradWeight(pre_out_grad, w_grad, *in);
181+
bit_code->MulGradWeight(pre_out_grad, w_grad, in);
185182
} else {
186-
framework::Vector<int64_t> real_rows = cal_rows(*path);
183+
framework::Vector<int64_t> real_rows = PathToRows(*path);
187184
auto* w_grad =
188185
ctx.Output<framework::SelectedRows>(framework::GradVarName("W"));
189186
w_grad->set_rows(real_rows);
190187
// Build a map of id -> row_index to speed up finding the index of one id
191188
w_grad->SyncIndex();
192-
w_grad->set_height(w->dims()[0]);
189+
w_grad->set_height(w.dims()[0]);
193190
auto* w_grad_value = w_grad->mutable_value();
194-
framework::DDim temp_dim(w->dims());
191+
framework::DDim temp_dim(w.dims());
195192
set(temp_dim, 0, real_rows.size());
196193

197194
w_grad_value->mutable_data<T>(temp_dim, ctx.GetPlace());
@@ -211,9 +208,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
211208
zero(dev_ctx, bias_grad_value, static_cast<T>(0.0));
212209
bit_code->AddGrad(pre_out_grad, bias_grad);
213210
}
214-
bit_code->MulGradWeight(pre_out_grad, w_grad, *in);
211+
bit_code->MulGradWeight(pre_out_grad, w_grad, in);
215212
}
216-
bit_code->MulGradError(pre_out_grad, *w, in_grad);
213+
bit_code->MulGradError(pre_out_grad, w, in_grad);
217214
}
218215
};
219216

0 commit comments

Comments
 (0)