|
4 | 4 | from pathlib import Path |
5 | 5 | from typing import List, Optional, Union |
6 | 6 |
|
| 7 | + |
7 | 8 | def text2token( |
8 | 9 | texts: List[str], |
9 | 10 | tokens: str, |
@@ -33,20 +34,20 @@ def text2token( |
33 | 34 | is True, or it is a list of list of tokens. |
34 | 35 | """ |
35 | 36 | try: |
36 | | - import sentencepiece as spm |
| 37 | + import sentencepiece as spm |
37 | 38 | except ImportError: |
38 | | - print('Please run') |
39 | | - print(' pip install sentencepiece') |
40 | | - print('before you continue') |
| 39 | + print("Please run") |
| 40 | + print(" pip install sentencepiece") |
| 41 | + print("before you continue") |
41 | 42 | raise |
42 | 43 |
|
43 | 44 | try: |
44 | 45 | from pypinyin import pinyin |
45 | 46 | from pypinyin.contrib.tone_convert import to_initials, to_finals_tone |
46 | 47 | except ImportError: |
47 | | - print('Please run') |
48 | | - print(' pip install pypinyin') |
49 | | - print('before you continue') |
| 48 | + print("Please run") |
| 49 | + print(" pip install pypinyin") |
| 50 | + print("before you continue") |
50 | 51 | raise |
51 | 52 |
|
52 | 53 | assert Path(tokens).is_file(), f"File not exists, {tokens}" |
@@ -119,7 +120,10 @@ def text2token( |
119 | 120 | if txt in tokens_table: |
120 | 121 | text_list.append(tokens_table[txt] if output_ids else txt) |
121 | 122 | else: |
122 | | - print(f"OOV token : {txt}, skipping text : {text}.") |
| 123 | + print( |
| 124 | + f"Can't find token {txt} in token table, check your " |
| 125 | + f"tokens.txt see if {txt} in it. skipping text : {text}." |
| 126 | + ) |
123 | 127 | contain_oov = True |
124 | 128 | break |
125 | 129 | if contain_oov: |
|
0 commit comments