Skip to content

Commit 2cc34d0

Browse files
committed
git upgrade tokenizer
1 parent 08f6371 commit 2cc34d0

File tree

10 files changed

+2238
-488
lines changed

10 files changed

+2238
-488
lines changed

paddlenlp/transformers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@
5353
# isort: split
5454
from .bert.modeling import *
5555
from .bert.tokenizer import *
56+
from .bert.tokenizer_fast import *
5657
from .bert.configuration import *
5758

5859
# isort: split
5960
from .gpt import *
6061
from .roberta.modeling import *
6162
from .roberta.tokenizer import *
63+
from .roberta.tokenizer_fast import *
6264
from .roberta.configuration import *
6365
from .electra.modeling import *
6466
from .electra.tokenizer import *

paddlenlp/transformers/auto/tokenizer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def get_configurations():
171171

172172

173173
def tokenizer_class_from_name(class_name: str):
174-
if class_name == "PretrainedTokenizerFast":
174+
if class_name in ["PretrainedTokenizerFast", "PreTrainedTokenizerFast"]:
175175
return PretrainedTokenizerFast
176176

177177
for module_name, tokenizers in TOKENIZER_MAPPING_NAMES.items():
@@ -309,6 +309,8 @@ class AutoTokenizer:
309309
pretrained weights/vocabulary.
310310
AutoTokenizer is a generic tokenizer class that will be instantiated as one of the
311311
base tokenizer classes when created with the AutoTokenizer.from_pretrained() classmethod.
312+
313+
This class cannot be instantiated directly using `__init__()` (throws an error).
312314
"""
313315

314316
_tokenizer_mapping = get_configurations()
@@ -338,7 +340,7 @@ def _get_tokenizer_class_from_config(cls, pretrained_model_name_or_path, config_
338340
try:
339341
if tokenizer_class is None:
340342
tokenizer_class = getattr(import_class, init_class)
341-
except:
343+
except AttributeError:
342344
raise ValueError(f"Tokenizer class {init_class} is not currently imported.")
343345
return tokenizer_class
344346
else:

paddlenlp/transformers/bert/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
from .configuration import *
16+
from .modeling import *
17+
from .tokenizer import *
18+
from .tokenizer_fast import *

0 commit comments

Comments
 (0)