Skip to content

Commit f4be1d9

Browse files
committed
polish code and test
1 parent b8ff097 commit f4be1d9

File tree

3 files changed

+63
-22
lines changed

3 files changed

+63
-22
lines changed

paddle/fluid/operators/hierarchical_sigmoid_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
115115
"[batch_size, code_length], where code_length represents the "
116116
"maximum path length from root to leaf nodes.")
117117
.AsIntermediate();
118-
AddAttr<AttrType>("num_classes", "(int, required), The number of classes")
118+
AddAttr<AttrType>("num_classes", "(int, optional), The number of classes")
119119
.SetDefault(2);
120120
AddComment(R"DOC(
121121
The hierarchical sigmoid operator organize the classes into a binary tree.

python/paddle/fluid/layers/nn.py

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4348,12 +4348,14 @@ def nce(input,
43484348

43494349
def hsigmoid(input,
43504350
label,
4351-
num_classes,
4352-
ptabl=None,
4351+
num_classes=None,
4352+
non_leaf_num=None,
4353+
ptable=None,
43534354
pcode=None,
43544355
param_attr=None,
43554356
bias_attr=None,
4356-
name=None):
4357+
name=None,
4358+
is_costum=False):
43574359
"""
43584360
The hierarchical sigmoid operator is used to accelerate the training
43594361
process of language model. This operator organizes the classes into a
@@ -4373,7 +4375,8 @@ def hsigmoid(input,
43734375
and :math:`D` is the feature size.
43744376
label (Variable): The tensor variable contains labels of training data.
43754377
It's a tensor with shape is :math:`[N \\times 1]`.
4376-
num_classes: (int), The number of classes, must not be less than 2.
4378+
num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set
4379+
non_leaf_num: this defines the number of non-leaf nodes in costumed tree
43774380
ptable: (Variable|None) this variable can store each batch of samples' path to root,
43784381
it should be in leaf -> root order
43794382
ptable should have the same shape with pcode, and for each sample i ptable[i] indicates a np.array like
@@ -4409,20 +4412,33 @@ def hsigmoid(input,
44094412
out = helper.create_variable_for_type_inference(dtype)
44104413
pre_out = helper.create_variable_for_type_inference(dtype)
44114414
dim = input.shape[1]
4412-
if num_classes < 2:
4413-
raise ValueError("num_classes must not be less than 2.")
4414-
if (ptable is not None) and (pcode is None):
4415-
raise ValueError("pcode should not be None when ptable has been set")
4416-
elif (ptable is None) and (pcode is not None):
4417-
raise ValueError("ptable should not be None when pcode has been set")
4415+
if ((num_classes < 2) or (num_classes is None)) and (not is_costum):
4416+
raise ValueError(
4417+
"num_classes must not be less than 2 with default tree")
4418+
4419+
if (is_costum) and (pcode is None):
4420+
raise ValueError("pcode should not be None with costum tree")
4421+
elif (is_costum) and (ptable is None):
4422+
raise ValueError("ptable should not be None with costum tree")
4423+
elif (is_costum) and (non_leaf_num is None):
4424+
raise ValueError("non_leaf_num should not be None with costum tree")
44184425
else:
44194426
pass
44204427

4421-
weights = helper.create_parameter(
4422-
attr=helper.param_attr,
4423-
shape=[num_classes - 1, dim],
4424-
is_bias=False,
4425-
dtype=input.dtype)
4428+
weights = None
4429+
4430+
if not is_costum:
4431+
weights = helper.create_parameter(
4432+
attr=helper.param_attr,
4433+
shape=[num_classes - 1, dim],
4434+
is_bias=False,
4435+
dtype=input.dtype)
4436+
else:
4437+
weights = helper.create_parameter(
4438+
attr=helper.param_attr,
4439+
shape=[non_leaf_num, dim],
4440+
is_bias=False,
4441+
dtype=input.dtype)
44264442
inputs = {
44274443
"X": input,
44284444
"W": weights,
@@ -4431,12 +4447,20 @@ def hsigmoid(input,
44314447
"Label": label
44324448
}
44334449
if helper.bias_attr:
4434-
bias = helper.create_parameter(
4435-
attr=helper.bias_attr,
4436-
shape=[1, num_classes - 1],
4437-
is_bias=True,
4438-
dtype=input.dtype)
4439-
inputs['Bias'] = bias
4450+
if not is_costum:
4451+
bias = helper.create_parameter(
4452+
attr=helper.bias_attr,
4453+
shape=[1, num_classes - 1],
4454+
is_bias=True,
4455+
dtype=input.dtype)
4456+
inputs['Bias'] = bias
4457+
else:
4458+
bias = helper.create_parameter(
4459+
attr=helper.bias_attr,
4460+
shape=[1, non_leaf_num],
4461+
is_bias=True,
4462+
dtype=input.dtype)
4463+
inputs['Bias'] = bias
44404464
helper.append_op(
44414465
type="hierarchical_sigmoid",
44424466
inputs=inputs,

python/paddle/fluid/tests/unittests/test_layers.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,23 @@ def test_hsigmoid(self):
185185
input=x, label=y, num_classes=2))
186186
print(str(program))
187187

188+
program2 = Program()
189+
190+
with program_guard(program2):
191+
x2 = layers.data(name='x2', shape=[4, 8], dtype='float32')
192+
y2 = layers.data(name='y2', shape=[4], dtype='int64')
193+
ptable = layers.data(name='ptable', shape=[4, 6], dtype='int64')
194+
pcode = layers.data(name='pcode', shape=[4, 6], dtype='int64')
195+
self.assertIsNotNone(
196+
layers.hsigmoid(
197+
input=x2,
198+
label=y2,
199+
non_leaf_num=6,
200+
ptable=ptable,
201+
pcode=pcode,
202+
is_costum=True))
203+
print(str(program2))
204+
188205
def test_sequence_expand(self):
189206
program = Program()
190207
with program_guard(program):

0 commit comments

Comments
 (0)