Skip to content

Commit 8df317a

Browse files
authored
Modify layers Doc (#1140)
* modify transforner-rst * modify roformer tokenizer * delete modifications * modify datasets * modify layers * fix errors * fix errors * fix errors
1 parent bb70fd1 commit 8df317a

File tree

5 files changed

+96
-76
lines changed

5 files changed

+96
-76
lines changed

paddlenlp/datasets/dataset.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,14 @@ def load_dataset(path_or_read_func,
7777
path_or_read_func (str|callable): Name of the dataset processing script
7878
in PaddleNLP library or a custom data reading function.
7979
name (str, optional): Additional name to select a more specific dataset.
80-
Default to None.
81-
data_files (str|list|tuple|dict, optional): Defineing the path of dataset
82-
files. If None. `splits` must be specified. Default to None.
80+
Defaults to None.
81+
data_files (str|list|tuple|dict, optional): Defining the path of dataset
82+
files. If None. `splits` must be specified. Defaults to None.
8383
splits (str|list|tuple, optional): Which split of the data to load. If None.
84-
`data_files` must be specified. Default to None.
85-
lazy (bool, optional): Wheather to return `MapDataset` or an `IterDataset`.
84+
`data_files` must be specified. Defaults to None.
85+
lazy (bool, optional): Weather to return `MapDataset` or an `IterDataset`.
8686
True for `IterDataset`. False for `MapDataset`. If None, return the
87-
default type of this dataset.
87+
default type of this dataset. Defaults to None.
8888
kwargs (dict): Other keyword arguments to be passed to the `DatasetBuilder`.
8989
9090
Returns:
@@ -195,7 +195,7 @@ def filter(self, fn, num_workers=0):
195195
fn (callable): A filter function that takes a sample as input and
196196
returns a boolean. Samples that return False would be discarded.
197197
num_workers(int, optional): Number of processes for multiprocessing. If
198-
set to 0, it doesn't use multiprocessing. Defalt: 0.
198+
set to 0, it doesn't use multiprocessing. Defaults to `0`.
199199
"""
200200
assert num_workers >= 0, "num_workers should be a non-negative value"
201201
if num_workers > 0:
@@ -241,14 +241,14 @@ def shard(self, num_shards=None, index=None, contiguous=False):
241241
Args:
242242
num_shards (int, optional): An integer representing the number of
243243
data shards. If None, `num_shards` would be number of trainers.
244-
Default: None
244+
Defaults to `None`.
245245
index (int, optional): An integer representing the index of the
246246
current shard. If None, `index` would be the current trainer rank
247-
id. Default: None.
247+
id. Defaults to `None`.
248248
contiguous: (bool, optional): If true, contiguous chunks of data
249249
will be select for sharding. And total number of examples will
250250
be the same. Otherwise each shard will contain all examples of
251-
dataset whose index mod `num_shards` = `index`. Default: False.
251+
dataset whose index mod `num_shards` = `index`. Defaults to `False`.
252252
"""
253253
if num_shards is None:
254254
num_shards = dist.get_world_size()
@@ -280,13 +280,13 @@ def map(self, fn, lazy=True, batched=False, num_workers=0):
280280
lazy (bool, optional): If True, transformations would be delayed and
281281
performed on demand. Otherwise, transforms all samples at once. Note that
282282
if `fn` is stochastic, `lazy` should be True or you will get the same
283-
result on all epochs. Defalt: False.
283+
result on all epochs. Defaults to False.
284284
batched(bool, optional): If True, transformations would take all examples as
285285
input and return a collection of transformed examples. Note that if set
286-
True, `lazy` option would be ignored. Defalt: False.
286+
True, `lazy` option would be ignored. Defaults to False.
287287
num_workers(int, optional): Number of processes for multiprocessing. If
288288
set to 0, it doesn't use multiprocessing. Note that if set to positive
289-
value, `lazy` option would be ignored. Defalt: 0.
289+
value, `lazy` option would be ignored. Defaults to 0.
290290
"""
291291

292292
assert num_workers >= 0, "num_workers should be a non-negative value"
@@ -416,10 +416,10 @@ def shard(self, num_shards=None, index=None):
416416
Args:
417417
num_shards (int, optional): An integer representing the number of
418418
data shards. If None, `num_shards` would be number of trainers.
419-
Default: None
419+
Defaults to None.
420420
index (int, optional): An integer representing the index of the
421421
current shard. If None, `index` would be the current trainer rank
422-
id. Default: None.
422+
id. Defaults to None.
423423
"""
424424
if num_shards is None:
425425
num_shards = dist.get_world_size()

paddlenlp/layers/crf.py

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ class LinearChainCrf(nn.Layer):
3535
See https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&context=cis_papers for reference.
3636
3737
Args:
38-
num_labels (`int`):
38+
num_labels (int):
3939
The label number.
40-
crf_lr (`float`, optional):
40+
crf_lr (float, optional):
4141
The crf layer learning rate. Defaults to ``0.1``.
42-
with_start_stop_tag (`bool`, optional):
42+
with_start_stop_tag (bool, optional):
4343
If set to True, the start tag and stop tag will be considered, the transitions params will be a tensor with a shape of `[num_labels+2, num_labels+2]`.
4444
Else, the transitions params will be a tensor with a shape of `[num_labels, num_labels]`.
4545
"""
@@ -105,14 +105,13 @@ def forward(self, inputs, lengths):
105105
Further, We can get F(n) is a recursive formula with F(n-1).
106106
107107
Args:
108-
inputs (`Tensor`):
108+
inputs (Tensor):
109109
The input predicted tensor. Its dtype is float32 and has a shape of `[batch_size, sequence_length, num_tags]`.
110-
lengths (`Tensor`):
110+
lengths (Tensor):
111111
The input length. Its dtype is int64 and has a shape of `[batch_size]`.
112112
113113
Returns:
114-
norm_score (`Tensor`):
115-
The normalizers tensor. Its dtype is float32 and has a shape of `[batch_size]`.
114+
Tensor: Returns the normalizers tensor `norm_score`. Its dtype is float32 and has a shape of `[batch_size]`.
116115
"""
117116
batch_size, seq_len, n_labels = inputs.shape
118117
inputs_t_exp = inputs.transpose([1, 0, 2]).unsqueeze(-1)
@@ -154,16 +153,15 @@ def gold_score(self, inputs, labels, lengths):
154153
$$ score(x,y) = \\sum_i Emit(x_i,y_i) + Trans(y_{i-1}, y_i) $$
155154
156155
Args:
157-
inputs (`Tensor`):
156+
inputs (Tensor):
158157
The input predicted tensor. Its dtype is float32 and has a shape of `[batch_size, sequence_length, num_tags]`.
159-
labels (`Tensor`) :
158+
labels (Tensor):
160159
The input label tensor. Its dtype is int64 and has a shape of `[batch_size, sequence_length]`
161-
lengths (`Tensor`):
160+
lengths (Tensor):
162161
The input length. Its dtype is int64 and has a shape of `[batch_size]`.
163162
164163
Returns:
165-
unnorm_score (`Tensor`):
166-
The unnormalized sequence scores tensor. Its dtype is float32 and has a shape of `[batch_size]`.
164+
Tensor: Returns the unnormalized sequence scores tensor `unnorm_score`. Its dtype is float32 and has a shape of `[batch_size]`.
167165
"""
168166
unnorm_score = self._point_score(
169167
inputs, labels, lengths) + self._trans_score(labels, lengths)
@@ -268,7 +266,7 @@ class LinearChainCrfLoss(nn.Layer):
268266
The negative log-likelihood for linear chain Conditional Random Field (CRF).
269267
270268
Args:
271-
crf (`LinearChainCrf`):
269+
crf (LinearChainCrf):
272270
The `LinearChainCrf` network object. Its parameter will be used to calculate the loss.
273271
"""
274272

@@ -286,16 +284,16 @@ def forward(self, inputs, lengths, labels, old_version_labels=None):
286284
then we have $$ loss = -logp(y|x) = -log(exp(score(x,y))/Z(x)) = -score(x,y) + logZ(x) $$
287285
288286
Args:
289-
inputs (`Tensor`):
287+
inputs (Tensor):
290288
The input predicted tensor. Its dtype is float32 and has a shape of `[batch_size, sequence_length, num_tags]`.
291-
lengths (`Tensor`):
289+
lengths (Tensor):
292290
The input length. Its dtype is int64 and has a shape of `[batch_size]`.
293-
labels (`Tensor`) :
291+
labels (Tensor) :
294292
The input label tensor. Its dtype is int64 and has a shape of `[batch_size, sequence_length]`
295-
old_version_labels (`Tensor`, optional): Unnecessary parameter for compatibility with older versions. Defaults to ``None``.
293+
old_version_labels (Tensor, optional): Unnecessary parameter for compatibility with older versions. Defaults to ``None``.
296294
297295
Returns:
298-
loss (`Tensor`): The crf loss. Its dtype is float32 and has a shape of `[batch_size]`.
296+
Tensor: The crf loss. Its dtype is float32 and has a shape of `[batch_size]`.
299297
"""
300298
# Note: When closing to convergence, the loss could be a small negative number. This may caused by underflow when calculating exp in logsumexp.
301299
# We add relu here to avoid negative loss. In theory, the crf loss must be greater than or equal to 0, relu will not impact on it.
@@ -318,9 +316,9 @@ class ViterbiDecoder(nn.Layer):
318316
ViterbiDecoder can decode the highest scoring sequence of tags, it should only be used at test time.
319317
320318
Args:
321-
transitions (`Tensor`):
319+
transitions (Tensor):
322320
The transition matrix. Its dtype is float32 and has a shape of `[num_tags, num_tags]`.
323-
with_start_stop_tag (`bool`, optional):
321+
with_start_stop_tag (bool, optional):
324322
If set to True, the last row and the last column of transitions will be considered as start tag,
325323
the the penultimate row and the penultimate column of transitions will be considered as stop tag.
326324
Else, all the rows and columns will be considered as the real tag. Defaults to ``None``.
@@ -363,15 +361,16 @@ def forward(self, inputs, lengths):
363361
Decode the highest scoring sequence of tags.
364362
365363
Args:
366-
inputs (`Tensor`):
364+
inputs (Tensor):
367365
The unary emission tensor. Its dtype is float32 and has a shape of `[batch_size, sequence_length, num_tags]`.
368-
length (`Tensor`):
366+
length (Tensor):
369367
The input length tensor storing real length of each sequence for correctness. Its dtype is int64 and has a shape of `[batch_size]`.
368+
370369
Returns:
371-
scores(`Tensor`):
372-
The scores tensor containing the score for the Viterbi sequence. Its dtype is float32 and has a shape of `[batch_size]`.
373-
paths(`Tensor`):
374-
The paths tensor containing the highest scoring tag indices. Its dtype is int64 and has a shape of `[batch_size, sequence_length`].
370+
tuple: Returns tuple (scores, paths). The `scores` tensor containing the score for the Viterbi sequence.
371+
Its dtype is float32 and has a shape of `[batch_size]`.
372+
The `paths` tensor containing the highest scoring tag indices.
373+
Its dtype is int64 and has a shape of `[batch_size, sequence_length]`.
375374
"""
376375
input_shape = paddle.shape(inputs)
377376
batch_size = input_shape[0]

paddlenlp/layers/sequence.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ def sequence_mask(seq_ids, valid_lengths):
1818
To boost the performance, this sequence_mask is different with paddle.fluid.layers.sequence_mask
1919
2020
Args:
21-
seq_ids (`Tensor`):
21+
seq_ids (Tensor):
2222
The whole sequence index, a tensor with a shape of [batch_size, sequence_length].
23-
valid_lengths (`Tensor`):
23+
valid_lengths (Tensor):
2424
The valid length of every sequence, a tensor with a shape of [batch_size].
2525
2626
Returns:
27-
mask (`Tensor`):
28-
The output sequence mask. Its dtype is ``bool`` and has a shpe of [batch_size, sequence_length].
27+
Tensor: Returns the output sequence mask `mask`.
28+
Its dtype is `bool` and has a shape of [batch_size, sequence_length].
2929
"""
3030
lengths_exp = valid_lengths.unsqueeze(1)
3131
mask = seq_ids < lengths_exp

paddlenlp/layers/tcn.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class Chomp1d(nn.Layer):
2323
Remove the elements on the right.
2424
2525
Args:
26-
chomp_size (`int`):
26+
chomp_size (int):
2727
The number of elements removed.
2828
"""
2929

@@ -41,19 +41,19 @@ class TemporalBlock(nn.Layer):
4141
See the Figure 1(b) in https://arxiv.org/pdf/1803.01271.pdf for more details.
4242
4343
Args:
44-
n_inputs (`int`):
44+
n_inputs (int):
4545
The number of channels in the input tensor.
46-
n_outputs (`int`):
46+
n_outputs (int):
4747
The number of filters.
48-
kernel_size (`int`):
48+
kernel_size (int):
4949
The filter size.
50-
stride (`int`):
50+
stride (int):
5151
The stride size.
52-
dilation (`int`):
52+
dilation (int):
5353
The dilation size.
54-
padding (`int`):
54+
padding (int):
5555
The size of zeros to be padded.
56-
dropout (`float`, optional):
56+
dropout (float, optional):
5757
Probability of dropout the units. Defaults to 0.2.
5858
"""
5959

@@ -112,6 +112,12 @@ def init_weights(self):
112112
paddle.tensor.normal(0.0, 0.01, self.downsample.weight.shape))
113113

114114
def forward(self, x):
115+
"""
116+
Args:
117+
x (Tensor):
118+
The input tensor with a shape of [batch_size, input_channel, sequence_length].
119+
120+
"""
115121
out = self.net(x)
116122
res = x if self.downsample is None else self.downsample(x)
117123
return self.relu(out + res)
@@ -124,10 +130,14 @@ def __init__(self, input_channel, num_channels, kernel_size=2, dropout=0.2):
124130
such as LSTMs in many tasks. See https://arxiv.org/pdf/1803.01271.pdf for more details.
125131
126132
Args:
127-
input_channel ([int]): The number of channels in the input tensor.
128-
num_channels ([list | tuple]): The number of channels in different layer.
129-
kernel_size (int, optional): [description]. Defaults to 2.
130-
dropout (float, optional): [description]. Defaults to 0.2.
133+
input_channel (int):
134+
The number of channels in the input tensor.
135+
num_channels (list | tuple):
136+
The number of channels in different layer.
137+
kernel_size (int, optional):
138+
The filter size.. Defaults to 2.
139+
dropout (float, optional):
140+
Probability of dropout the units.. Defaults to 0.2.
131141
"""
132142
super(TCN, self).__init__()
133143
layers = nn.LayerList()
@@ -153,12 +163,11 @@ def forward(self, x):
153163
Apply temporal convolutional networks to the input tensor.
154164
155165
Args:
156-
x (`Tensor`):
157-
The input tensor with a shape of [batch_size, input_channel, sequence_length].
166+
x (Tensor):
167+
The input tensor with a shape of [batch_size, input_channel, sequence_length].
158168
159169
Returns:
160-
output (`Tensor`):
161-
The output tensor with a shape of [batch_size, num_channels[-1], sequence_length].
170+
Tensor: The `output` tensor with a shape of [batch_size, num_channels[-1], sequence_length].
162171
"""
163172
output = self.network(x)
164173
return output

paddlenlp/losses/rdrop.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,24 @@
1818

1919
__all__ = ['RDropLoss']
2020

21+
2122
class RDropLoss(nn.Layer):
2223
"""
2324
R-Drop Loss implementation
2425
For more information about R-drop please refer to this paper: https://arxiv.org/abs/2106.14448
2526
Original implementation please refer to this code: https://github.com/dropreg/R-Drop
27+
28+
Args:
29+
reduction(str, optional):
30+
Indicate how to average the loss, the candicates are ``'none'``,``'batchmean'``,``'mean'``,``'sum'``.
31+
If `reduction` is ``'mean'``, the reduced mean loss is returned;
32+
If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned;
33+
If `reduction` is ``'sum'``, the reduced sum loss is returned;
34+
If `reduction` is ``'none'``, no reduction will be applied.
35+
Defaults to ``'none'``.
2636
"""
37+
2738
def __init__(self, reduction='none'):
28-
"""
29-
reduction(obj:`str`, optional): Indicate how to average the loss,
30-
the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``.
31-
If `reduction` is ``'mean'``, the reduced mean loss is returned;
32-
If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned;
33-
if `reduction` is ``'sum'``, the reduced sum loss is returned;
34-
if `reduction` is ``'none'``, no reduction will be apllied.
35-
Default is ``'none'``.
36-
"""
3739
super(RDropLoss, self).__init__()
3840
if reduction not in ['sum', 'mean', 'none', 'batchmean']:
3941
raise ValueError(
@@ -44,15 +46,25 @@ def __init__(self, reduction='none'):
4446
def forward(self, p, q, pad_mask=None):
4547
"""
4648
Args:
47-
p(obj:`Tensor`): the first forward logits of training examples.
48-
q(obj:`Tensor`): the second forward logits of training examples.
49-
pad_mask(obj:`Tensor`, optional): The Tensor containing the binary mask to index with, it's data type is bool.
49+
p(Tensor): the first forward logits of training examples.
50+
q(Tensor): the second forward logits of training examples.
51+
pad_mask(Tensor, optional): The Tensor containing the binary mask to index with, it's data type is bool.
5052
5153
Returns:
52-
loss(obj:`Tensor`): the rdrop loss of p and q
54+
Tensor: Returns tensor `loss`, the rdrop loss of p and q.
5355
"""
54-
p_loss = F.kl_div(F.log_softmax(p, axis=-1), F.softmax(q, axis=-1), reduction=self.reduction)
55-
q_loss = F.kl_div(F.log_softmax(q, axis=-1), F.softmax(p, axis=-1), reduction=self.reduction)
56+
p_loss = F.kl_div(
57+
F.log_softmax(
58+
p, axis=-1),
59+
F.softmax(
60+
q, axis=-1),
61+
reduction=self.reduction)
62+
q_loss = F.kl_div(
63+
F.log_softmax(
64+
q, axis=-1),
65+
F.softmax(
66+
p, axis=-1),
67+
reduction=self.reduction)
5668

5769
# pad_mask is for seq-level tasks
5870
if pad_mask is not None:

0 commit comments

Comments
 (0)