Skip to content

Commit b10df8b

Browse files
committed
refine code and add none bias ut, test=develop
1 parent 81e1457 commit b10df8b

File tree

7 files changed

+94
-42
lines changed

7 files changed

+94
-42
lines changed

paddle/fluid/framework/selected_rows.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ class SelectedRows {
118118
*
119119
* @return index of the key.
120120
*/
121-
int64_t AutoGrownIndex(int64_t key, bool auto_grown, bool is_test = false) {
121+
inline int64_t AutoGrownIndex(int64_t key, bool auto_grown,
122+
bool is_test = false) {
122123
if (is_test) {
123124
auto iter = id_to_index_.find(key);
124125
if (iter == id_to_index_.end()) {

paddle/fluid/operators/hierarchical_sigmoid_op.cc

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/operators/hierarchical_sigmoid_op.h"
16+
#include <string>
1617
#include <vector>
17-
1818
namespace paddle {
1919
namespace operators {
2020

@@ -109,7 +109,8 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
109109
AddInput("Bias",
110110
"(LoDTensor, optional), The bias is a tensor with shape or "
111111
"[non_leaf_num, 1]"
112-
"[num_classes - 1, 1].");
112+
"[num_classes - 1, 1].")
113+
.AsDispensable();
113114
AddOutput(
114115
"Out",
115116
"(LoDTensor, required) The output of hierarchical sigmoid operator."
@@ -173,31 +174,42 @@ class HierarchicalSigmoidGradOpGradVarTypeInference
173174
public:
174175
void operator()(const framework::OpDesc& op_desc,
175176
framework::BlockDesc* block) const override {
176-
auto out_W_var_name = op_desc.Output(framework::GradVarName("W")).front();
177-
auto out_Bias_var_name =
178-
op_desc.Output(framework::GradVarName("Bias")).front();
177+
auto w_grad_var_name = op_desc.Output(framework::GradVarName("W")).front();
178+
auto bias_grad_var_name_vec =
179+
op_desc.Output(framework::GradVarName("Bias"));
180+
std::string bias_grad_var_name;
181+
bool hasBias = false;
182+
if (bias_grad_var_name_vec.size()) {
183+
hasBias = true;
184+
bias_grad_var_name =
185+
op_desc.Output(framework::GradVarName("Bias")).front();
186+
}
179187
auto attr = op_desc.GetAttr("is_sparse");
180188
bool is_sparse = boost::get<bool>(attr);
181189
if (is_sparse) {
182-
VLOG(3) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
183-
<< " is set to SelectedRows";
184-
block->Var(out_W_var_name)
185-
->SetType(framework::proto::VarType::SELECTED_ROWS);
186-
VLOG(3) << "hierarchical_sigmoid_grad op "
187-
<< framework::GradVarName("Bias") << " is set to SelectedRows";
188-
block->Var(out_Bias_var_name)
190+
VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
191+
<< " is set to SelectedRows";
192+
block->Var(w_grad_var_name)
189193
->SetType(framework::proto::VarType::SELECTED_ROWS);
194+
if (hasBias) {
195+
VLOG(30) << "hierarchical_sigmoid_grad op "
196+
<< framework::GradVarName("Bias") << " is set to SelectedRows";
197+
block->Var(bias_grad_var_name)
198+
->SetType(framework::proto::VarType::SELECTED_ROWS);
199+
}
190200
} else {
191-
VLOG(3) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
192-
<< " is set to LoDTensor";
193-
block->Var(out_W_var_name)
194-
->SetType(framework::proto::VarType::LOD_TENSOR);
195-
VLOG(3) << "hierarchical_sigmoid_grad op "
196-
<< framework::GradVarName("Bias") << " is set to LoDTensor";
197-
block->Var(out_Bias_var_name)
201+
VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
202+
<< " is set to LoDTensor";
203+
block->Var(w_grad_var_name)
198204
->SetType(framework::proto::VarType::LOD_TENSOR);
205+
if (hasBias) {
206+
VLOG(30) << "hierarchical_sigmoid_grad op "
207+
<< framework::GradVarName("Bias") << " is set to LoDTensor";
208+
block->Var(bias_grad_var_name)
209+
->SetType(framework::proto::VarType::LOD_TENSOR);
210+
}
199211
}
200-
block->Var(out_W_var_name)->SetDataType(block->Var("W")->GetDataType());
212+
block->Var(w_grad_var_name)->SetDataType(block->Var("W")->GetDataType());
201213
}
202214
};
203215

paddle/fluid/operators/hierarchical_sigmoid_op.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ using platform::Transform;
3333
std::vector<int64_t> cal_rows(const framework::LoDTensor& path) {
3434
std::set<int64_t> tmp;
3535
std::vector<int64_t> rows;
36-
rows.clear();
3736
for (size_t i = 0; i < static_cast<size_t>(path.dims()[0]); i++) {
3837
for (size_t j = 0; j < static_cast<size_t>(path.dims()[1]); j++) {
3938
int64_t temp =
@@ -63,8 +62,6 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
6362
bool is_custom = false;
6463
if (path) {
6564
is_custom = true;
66-
} else {
67-
is_custom = false;
6865
}
6966
int64_t code_length =
7067
path ? path->dims()[1] : math::FindLastSet(num_classes - 1);
@@ -96,7 +93,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
9693
out->mutable_data<T>(ctx.GetPlace());
9794
auto out_mat = framework::EigenVector<T>::Flatten(*out);
9895
if (bias) {
99-
bit_code->Add(pre_out, *bias);
96+
bit_code->Add(*bias, pre_out);
10097
}
10198
bit_code->Mul(pre_out, *w, *in);
10299
// clip to [-40, 40]
@@ -145,8 +142,6 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
145142
bool is_custom = false;
146143
if (path) {
147144
is_custom = true;
148-
} else {
149-
is_custom = false;
150145
}
151146

152147
std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
@@ -192,7 +187,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
192187
auto* w_grad =
193188
ctx.Output<framework::SelectedRows>(framework::GradVarName("W"));
194189
w_grad->set_rows(real_rows);
195-
// build ids -> rows index map
190+
// Build a map of id -> row_index to speed up finding the index of one id
196191
w_grad->SyncIndex();
197192
w_grad->set_height(w->dims()[0]);
198193
auto* w_grad_value = w_grad->mutable_value();

paddle/fluid/operators/math/matrix_bit_code.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ namespace operators {
1919
namespace math {
2020

2121
template <typename T>
22-
void MatrixBitCodeFunctor<T>::Add(framework::LoDTensor* tmat,
23-
const framework::LoDTensor& vec) {
22+
void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec,
23+
framework::LoDTensor* tmat) {
2424
size_t batch_size = tmat->dims()[0];
2525
size_t width = tmat->dims()[1];
2626
for (size_t i = 0; i < batch_size; ++i) {

paddle/fluid/operators/math/matrix_bit_code.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ class MatrixBitCodeFunctor {
234234
/* For j < code_length
235235
tmat(i, j) += vec(0, index(i, j))
236236
*/
237-
void Add(framework::LoDTensor* tmat, const framework::LoDTensor& vec);
237+
void Add(const framework::LoDTensor& vec, framework::LoDTensor* tmat);
238238

239239
/* For j < code_length
240240
vec(0, index(i, j)) += tmat(i, j)

python/paddle/fluid/layers/nn.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4535,12 +4535,12 @@ def nce(input,
45354535
def hsigmoid(input,
45364536
label,
45374537
num_classes=None,
4538-
non_leaf_num=None,
4539-
ptable=None,
4540-
pcode=None,
45414538
param_attr=None,
45424539
bias_attr=None,
45434540
name=None,
4541+
non_leaf_num=None,
4542+
ptable=None,
4543+
pcode=None,
45444544
is_costum=False,
45454545
is_sparse=False):
45464546
"""
@@ -4583,7 +4583,8 @@ def hsigmoid(input,
45834583
will be named automatically. Default: None.
45844584
is_costum: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is
45854585
set you need to set ptable/pcode/non_leaf_num, otherwise num_classes should be set
4586-
is_sparse: (bool|False)using sparse update instead of dense update
4586+
is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient
4587+
of W and input will be sparse.
45874588
45884589
Returns:
45894590
Out: (LodTensor) The cost of hierarchical sigmoid operator. the shape is [N, 1]

python/paddle/fluid/tests/unittests/test_hsigmoid_op.py

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,13 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes):
110110
pre_output = np.zeros((batch_size, code_length))
111111
pre_sum = np.zeros((batch_size, 1))
112112
out = np.zeros((batch_size, 1)).astype("float32")
113-
for i in range(batch_size):
114-
code_table = CodeTableWithCustomTree(ptable, pcode, i)
115-
length = code_table.get_length()
116-
for j in range(length):
117-
idx = code_table.cal_index(j)
118-
pre_output[i][j] += bias[idx][0]
113+
if isinstance(bias, np.ndarray):
114+
for i in range(batch_size):
115+
code_table = CodeTableWithCustomTree(ptable, pcode, i)
116+
length = code_table.get_length()
117+
for j in range(length):
118+
idx = code_table.cal_index(j)
119+
pre_output[i][j] += bias[idx][0]
119120
for i in range(batch_size):
120121
code_table = CodeTableWithCustomTree(ptable, pcode, i)
121122
length = code_table.get_length()
@@ -215,11 +216,11 @@ def hs_net_conf(self, is_sparse):
215216
cost = fluid.layers.hsigmoid(
216217
input=emb,
217218
label=label,
219+
bias_attr=True,
218220
non_leaf_num=3,
219221
ptable=ptable,
220222
pcode=pcode,
221223
is_costum=True,
222-
bias_attr=True,
223224
is_sparse=is_sparse)
224225

225226
avg_cost = fluid.layers.reduce_mean(cost)
@@ -299,5 +300,47 @@ def test_check_grad(self):
299300
self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
300301

301302

303+
class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest):
304+
def setUp(self):
305+
self.op_type = "hierarchical_sigmoid"
306+
num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
307+
feature_size = 8
308+
batch_size = 4
309+
x = np.random.random((batch_size, feature_size)).astype("float32") * 2
310+
w = np.random.random(
311+
(num_classes - 1, feature_size)).astype("float32") * 2
312+
label = np.array([0, 1, 4, 5])
313+
ptable = np.array(
314+
[(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
315+
(0, 2, -1, -1,
316+
-1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
317+
pcode = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
318+
1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store
319+
# bias = np.random.random((num_classes - 1, 1)).astype("float32")
320+
self.attrs = {'num_classes': num_classes, 'is_sparse': False}
321+
self.inputs = {
322+
'X': x,
323+
'W': w,
324+
'PTable': ptable,
325+
'PCode': pcode,
326+
'Label': label,
327+
}
328+
pre_output, out = hsigmoidWithCustomTree(
329+
x=x,
330+
w=w,
331+
ptable=ptable,
332+
pcode=pcode,
333+
label=label,
334+
bias=None,
335+
num_classes=num_classes)
336+
self.outputs = {'PreOut': pre_output, 'Out': out}
337+
338+
def test_check_output(self):
339+
self.check_output()
340+
341+
def test_check_grad(self):
342+
self.check_grad(['X', 'W'], ['Out'], no_grad_set=set('Label'))
343+
344+
302345
if __name__ == '__main__':
303346
unittest.main()

0 commit comments

Comments
 (0)