Skip to content

Commit 04de795

Browse files
authored
Modify Roformer Doc (PaddlePaddle#1104)
* modify transforner-rst * modify roformer tokenizer * modify roformer model * update * modify transformer * modify roformer modeling * modify decoder * update * modify tokenizer * modify token_embedding
1 parent 081e285 commit 04de795

File tree

7 files changed

+555
-114
lines changed

7 files changed

+555
-114
lines changed

paddlenlp/embeddings/token_embedding.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,14 @@ def search(self, words):
242242
Returns:
243243
`numpy.array`: The vectors of specifying words.
244244
245+
Examples:
246+
.. code-block::
247+
248+
from paddlenlp.embeddings import TokenEmbedding
249+
250+
embed = TokenEmbedding()
251+
vector = embed.search('Welcome to use PaddlePaddle and PaddleNLP!')
252+
245253
"""
246254
idx_list = self.get_idx_list_from_words(words)
247255
idx_tensor = paddle.to_tensor(idx_list)
@@ -271,6 +279,15 @@ def get_idx_list_from_words(self, words):
271279
Returns:
272280
`list`: The indexes list of specifying words.
273281
282+
Examples:
283+
.. code-block::
284+
285+
from paddlenlp.embeddings import TokenEmbedding
286+
287+
embed = TokenEmbedding()
288+
index = embed.get_idx_from_word('Welcome to use PaddlePaddle and PaddleNLP!')
289+
#635963
290+
274291
"""
275292
if isinstance(words, str):
276293
idx_list = [self.get_idx_from_word(words)]
@@ -305,7 +322,16 @@ def dot(self, word_a, word_b):
305322
word_b (`str`): The second word string.
306323
307324
Returns:
308-
`Float`: The dot product of 2 words.
325+
float: The dot product of 2 words.
326+
327+
Examples:
328+
.. code-block::
329+
330+
from paddlenlp.embeddings import TokenEmbedding
331+
332+
embed = TokenEmbedding()
333+
dot_product = embed.dot('PaddlePaddle', 'PaddleNLP!')
334+
#0.11827179
309335
310336
"""
311337
dot = self._dot_np
@@ -321,7 +347,16 @@ def cosine_sim(self, word_a, word_b):
321347
word_b (`str`): The second word string.
322348
323349
Returns:
324-
`Float`: The cosine similarity of 2 words.
350+
float: The cosine similarity of 2 words.
351+
352+
Examples:
353+
.. code-block::
354+
355+
from paddlenlp.embeddings import TokenEmbedding
356+
357+
embed = TokenEmbedding()
358+
cosine_simi = embed.cosine_sim('PaddlePaddle', 'PaddleNLP!')
359+
#0.99999994
325360
326361
"""
327362
dot = self._dot_np

paddlenlp/transformers/bert/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ def forward(self,
484484
tokenizer = BertTokenizer.from_pretrained('bert-wwm-chinese')
485485
model = BertModel.from_pretrained('bert-wwm-chinese')
486486
487-
inputs = tokenizer("欢迎使用百度飞浆!")
487+
inputs = tokenizer("欢迎使用百度飞桨!")
488488
inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
489489
output = model(**inputs)
490490
'''

0 commit comments

Comments
 (0)