Skip to content

Commit 4e0f769

Browse files
committed
Only skip pre_tokenizer print for mecab tokenizer type
1 parent c484802 commit 4e0f769

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

convert_hf_to_gguf_update.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,14 @@ def download_model(model):
225225
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
226226
continue
227227

228+
pre_tokenizer_log = True
228229
if os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
229230
with open(f"models/tokenizers/{name}/tokenizer_config.json", "r", encoding="utf-8") as f:
230231
cfg = json.load(f)
231232
if "word_tokenizer_type" in cfg and cfg["word_tokenizer_type"] == "mecab":
232233
# Mecab need to be installed via fugashi
233234
fugashi_check()
235+
pre_tokenizer_log = False
234236

235237
# create the tokenizer
236238
try:
@@ -251,8 +253,8 @@ def download_model(model):
251253
logger.info(f"chktok: {chktok}")
252254
logger.info(f"chkhsh: {chkhsh}")
253255

254-
# print the "pre_tokenizer" content from the tokenizer.json, if exists
255-
if os.path.isfile(f"models/tokenizers/{name}/tokenizer.json"):
256+
# print the "pre_tokenizer" content from the tokenizer.json
257+
if pre_tokenizer_log:
256258
with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
257259
cfg = json.load(f)
258260
normalizer = cfg["normalizer"]

0 commit comments

Comments
 (0)