@@ -122,22 +122,22 @@ class TOKENIZER_TYPE(IntEnum):
122122]
123123
124124
125- def install_if_missing ( package_spec : str , module_name : str = None ):
125+ def fugashi_check ( ):
126126 """
127- Installs the package via pip if the module cannot be imported.
128-
129- Args:
130- package_spec (str): The pip install spec, e.g., 'fugashi[unidic-lite]'.
131- module_name (str): The module name to check via import. If None, uses the base name from package_spec.
127+ Check if fugashi and Japanese dictionary are installed and can be imported.
132128 """
133- if module_name is None :
134- module_name = package_spec .split ("[" )[0 ]
135-
136- if importlib .util .find_spec (module_name ) is None :
137- print (f"Module '{ module_name } ' not found. Installing '{ package_spec } '..." )
138- subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , package_spec ])
139- else :
140- print (f"Module '{ module_name } ' is already installed." )
129+ try :
130+ import fugashi
131+ tagger = fugashi .Tagger ()
132+ except ImportError :
133+ raise ImportError (
134+ "fugashi is missing, install it via: pip install 'fugashi[unidic-lite]'"
135+ )
136+ except Exception :
137+ raise RuntimeError (
138+ "fugashi is installed, but it might be missing the dictionary (e.g., unidic-lite).\n "
139+ "Try installing via: pip install 'fugashi[unidic-lite]'\n "
140+ )
141141
142142
143143def download_file_with_auth (url , token , save_path ):
@@ -230,7 +230,7 @@ def download_model(model):
230230 cfg = json .load (f )
231231 if "word_tokenizer_type" in cfg and cfg ["word_tokenizer_type" ] == "mecab" :
232232 # Mecab need to be installed via fugashi
233- install_if_missing ( "fugashi[unidic-lite]" )
233+ fugashi_check ( )
234234
235235 # create the tokenizer
236236 try :
0 commit comments