@@ -25,6 +25,7 @@ class SqueezeBertTokenizer(PretrainedTokenizer):
25
25
Constructs a SqueezeBert tokenizer. It uses a basic tokenizer to do punctuation
26
26
splitting, lower casing and so on, and follows a WordPiece tokenizer to
27
27
tokenize as subwords.
28
+
28
29
Args:
29
30
vocab_file (str): file path of the vocabulary
30
31
do_lower_case (bool): Whether the text strips accents and convert to
@@ -35,6 +36,7 @@ class SqueezeBertTokenizer(PretrainedTokenizer):
35
36
pad_token (str): The special token for padding. Default: "[PAD]".
36
37
cls_token (str): The special token for cls. Default: "[CLS]".
37
38
mask_token (str): The special token for mask. Default: "[MASK]".
39
+
38
40
Examples:
39
41
.. code-block:: python
40
42
from paddlenlp.transformers import SqueezeBertTokenizer
@@ -47,9 +49,12 @@ class SqueezeBertTokenizer(PretrainedTokenizer):
47
49
resource_files_names = {"vocab_file" : "vocab.txt" } # for save_pretrained
48
50
pretrained_resource_files_map = {
49
51
"vocab_file" : {
50
- "squeezebert-uncased" : "squeezebert-uncased-vocab.txt" ,
51
- "squeezebert-mnli" : "squeezebert-mnli-vocab.txt" ,
52
- "queezebert-mnli-headless" : "queezebert-mnli-headless-vocab.txt" ,
52
+ "squeezebert-uncased" :
53
+ "http://bj.bcebos.com/paddlenlp/models/transformers/squeezebert/squeezebert-uncased/vocab.txt" ,
54
+ "squeezebert-mnli" :
55
+ "http://bj.bcebos.com/paddlenlp/models/transformers/squeezebert/squeezebert-mnli/vocab.txt" ,
56
+ "squeezebert-mnli-headless" :
57
+ "http://bj.bcebos.com/paddlenlp/models/transformers/squeezebert/squeezebert-mnli-headless/vocab.txt" ,
53
58
}
54
59
}
55
60
pretrained_init_configuration = {
@@ -59,7 +64,7 @@ class SqueezeBertTokenizer(PretrainedTokenizer):
59
64
"squeezebert-mnli" : {
60
65
"do_lower_case" : True
61
66
},
62
- "queezebert -mnli-headless" : {
67
+ "squeezebert -mnli-headless" : {
63
68
"do_lower_case" : True
64
69
}
65
70
}
0 commit comments