Skip to content

Commit 565f335

Browse files
authored
Fix hotwords OOV log (k2-fsa#1139)
1 parent 1eb7982 commit 565f335

File tree

2 files changed

+15
-11
lines changed

2 files changed

+15
-11
lines changed

sherpa-onnx/csrc/utils.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ static bool EncodeBase(const std::vector<std::string> &lines,
6262
break;
6363
default:
6464
SHERPA_ONNX_LOGE(
65-
"Cannot find ID for token %s at line: %s. (Hint: words on "
66-
"the same line are separated by spaces)",
67-
word.c_str(), line.c_str());
65+
"Cannot find ID for token %s at line: %s. (Hint: Check the "
66+
"tokens.txt see if %s in it)",
67+
word.c_str(), line.c_str(), word.c_str());
6868
has_oov = true;
6969
break;
7070
}

sherpa-onnx/python/sherpa_onnx/utils.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
from typing import List, Optional, Union
66

7+
78
def text2token(
89
texts: List[str],
910
tokens: str,
@@ -33,20 +34,20 @@ def text2token(
3334
is True, or it is a list of list of tokens.
3435
"""
3536
try:
36-
import sentencepiece as spm
37+
import sentencepiece as spm
3738
except ImportError:
38-
print('Please run')
39-
print(' pip install sentencepiece')
40-
print('before you continue')
39+
print("Please run")
40+
print(" pip install sentencepiece")
41+
print("before you continue")
4142
raise
4243

4344
try:
4445
from pypinyin import pinyin
4546
from pypinyin.contrib.tone_convert import to_initials, to_finals_tone
4647
except ImportError:
47-
print('Please run')
48-
print(' pip install pypinyin')
49-
print('before you continue')
48+
print("Please run")
49+
print(" pip install pypinyin")
50+
print("before you continue")
5051
raise
5152

5253
assert Path(tokens).is_file(), f"File not exists, {tokens}"
@@ -119,7 +120,10 @@ def text2token(
119120
if txt in tokens_table:
120121
text_list.append(tokens_table[txt] if output_ids else txt)
121122
else:
122-
print(f"OOV token : {txt}, skipping text : {text}.")
123+
print(
124+
f"Can't find token {txt} in token table, check your "
125+
f"tokens.txt see if {txt} in it. skipping text : {text}."
126+
)
123127
contain_oov = True
124128
break
125129
if contain_oov:

0 commit comments

Comments
 (0)