Skip to content

Commit f9c1742

Browse files
refactor: change model path in MKTokenizer.
1 parent f457588 commit f9c1742

File tree

6 files changed

+4
-29027
lines changed

6 files changed

+4
-29027
lines changed

apps/common/config/tokenizer_manage_config.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@
66
@date:2024/4/28 10:17
77
@desc:
88
"""
9-
import os
10-
from pathlib import Path
11-
12-
BASE_DIR = Path(__file__).resolve().parent.parent.parent
139

10+
import os
1411

1512
class MKTokenizer:
1613
def __init__(self, tokenizer):
@@ -27,6 +24,7 @@ class TokenizerManage:
2724
def get_tokenizer():
2825
from tokenizers import Tokenizer
2926
# 创建Tokenizer
30-
s = os.path.join(BASE_DIR.parent, 'tokenizer', 'bert-base-cased', 'tokenizer.json')
31-
TokenizerManage.tokenizer = Tokenizer.from_file(s)
27+
model_path = os.path.join("/opt/maxkb-app", "model", "tokenizer", "models--bert-base-cased")
28+
with open(f"{model_path}/refs/main", encoding="utf-8") as f: snapshot = f.read()
29+
TokenizerManage.tokenizer = Tokenizer.from_file(f"{model_path}/snapshots/{snapshot}/tokenizer.json")
3230
return MKTokenizer(TokenizerManage.tokenizer)

tokenizer/bert-base-cased/config.json

Lines changed: 0 additions & 23 deletions
This file was deleted.

tokenizer/bert-base-cased/special_tokens_map.json

Whitespace-only changes.

tokenizer/bert-base-cased/tokenizer.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

tokenizer/bert-base-cased/tokenizer_config.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)