4545
4646from .misc import (doc_loaders_cache , html_to_text , hasher ,
4747 file_hasher , get_splitter , check_docs_tkn_length ,
48- average_word_length , wpm )
48+ average_word_length , wpm , loaders_temp_dir_file )
4949from .typechecker import optional_typecheck
5050from .logger import whi , yel , red , log
5151from .flags import is_verbose , is_linux
@@ -173,8 +173,6 @@ def load(self):
173173 ["norm" ],
174174]
175175
176- global_temp_dir = [None ] # will be replaced when load_one_doc is called
177-
178176
179177@optional_typecheck
180178def load_one_doc (
@@ -191,7 +189,11 @@ def load_one_doc(
191189 The loader is cached"""
192190 text_splitter = get_splitter (task )
193191
194- assert global_temp_dir [0 ] is temp_dir
192+ expected_global_dir = loaders_temp_dir_file .read_text ().strip ()
193+ assert expected_global_dir , f"Empty loaders_temp_dir_file at { loaders_temp_dir_file } "
194+ expected_global_dir = Path (expected_global_dir )
195+ assert expected_global_dir .exists (), f"File loaders_temp_dir_file not found in { loaders_temp_dir_file } pointing at '{ expected_global_dir } '"
196+ assert expected_global_dir == temp_dir , f"Error handling temp dir: temp_dir is { temp_dir } but loaders_temp_dir is { expected_global_dir } "
195197
196198 if filetype == "youtube" :
197199 docs = load_youtube_video (** kwargs )
@@ -397,7 +399,7 @@ def load_youtube_video(
397399 )
398400 else :
399401 whi (f"Downloading audio from url: '{ path } '" )
400- file_name = global_temp_dir [ 0 ] / f"youtube_audio_{ uuid .uuid4 ()} " # without extension!
402+ file_name = load_temp_dir / f"youtube_audio_{ uuid .uuid4 ()} " # without extension!
401403 ydl_opts = {
402404 'format' : 'bestaudio/best' ,
403405 'postprocessors' : [{
@@ -412,7 +414,7 @@ def load_youtube_video(
412414 with youtube_dl .YoutubeDL (ydl_opts ) as ydl :
413415 ydl .download ([path ])
414416 candidate = []
415- for f in global_temp_dir [ 0 ] .iterdir ():
417+ for f in load_temp_dir .iterdir ():
416418 if file_name .name in f .name :
417419 candidate .append (f )
418420 assert len (candidate ), f"Audio file of { path } failed to download?"
@@ -532,7 +534,7 @@ def load_anki(
532534 original_db = akp .find_db (user = anki_profile )
533535 name = f"{ anki_profile } " .replace (" " , "_" )
534536 random_val = str (uuid .uuid4 ()).split ("-" )[- 1 ]
535- new_db_path = global_temp_dir [ 0 ] / f"anki_collection_{ name .replace ('/' , '_' )} _{ random_val } "
537+ new_db_path = load_temp_dir / f"anki_collection_{ name .replace ('/' , '_' )} _{ random_val } "
536538 assert not Path (new_db_path ).exists (
537539 ), f"{ new_db_path } already existing!"
538540 shutil .copy (original_db , new_db_path )
@@ -924,8 +926,8 @@ def load_local_audio(
924926 )
925927 red (f"Removed silence from { path .name } : { dur :.1f} -> { new_dur :.1f} in { elapsed :.1f} s" )
926928
927- unsilenced_path_wav = global_temp_dir [ 0 ] / f"unsilenced_audio_{ uuid .uuid4 ()} .wav"
928- unsilenced_path_ogg = global_temp_dir [ 0 ] / f"unsilenced_audio_{ uuid .uuid4 ()} .ogg"
929+ unsilenced_path_wav = load_temp_dir / f"unsilenced_audio_{ uuid .uuid4 ()} .wav"
930+ unsilenced_path_ogg = load_temp_dir / f"unsilenced_audio_{ uuid .uuid4 ()} .ogg"
929931 assert not unsilenced_path_wav .exists ()
930932 assert not unsilenced_path_ogg .exists ()
931933 torchaudio .save (
@@ -1009,7 +1011,7 @@ def load_local_video(
10091011 ) -> List [Document ]:
10101012 assert Path (path ).exists (), f"file not found: '{ path } '"
10111013
1012- audio_path = global_temp_dir [ 0 ] / f"audio_from_video_{ uuid .uuid4 ()} .mp3"
1014+ audio_path = load_temp_dir / f"audio_from_video_{ uuid .uuid4 ()} .mp3"
10131015 assert not audio_path .exists ()
10141016
10151017 # extract audio from video
0 commit comments