@@ -86,19 +86,19 @@ def is_model_cached(repo_id: str, cache_dir: str):
8686 )
8787 if os .path .isfile (ref_file ):
8888 with open (ref_file , 'r' ) as fh :
89- t = fh .readlines ()
89+ t = fh .readlines () # Getting hash contents
9090 ref = t [0 ].strip ()
9191 else :
92- return False , snapshot_dir
92+ return False , snapshot_dir , ref_file
9393 snapshot_dir = os .path .join (
9494 cache_dir , f'models--{ repo_id .replace ("/" , "--" )} ' , 'snapshots' , ref
9595 )
9696 if os .path .isdir (snapshot_dir ):
97- return True , snapshot_dir
97+ return True , snapshot_dir , ref_file
9898 else :
99- return False , None
99+ return False , None , ref_file
100100 else :
101- return False , snapshot_dir
101+ return False , snapshot_dir , ref_file
102102
103103
104104def load_model_and_tokenizer (
@@ -116,7 +116,7 @@ def load_model_and_tokenizer(
116116 model_loader = AutoModelForMaskedLM
117117 if tokenizer_loader is None :
118118 tokenizer_loader = AutoTokenizer
119- exists , exists_at = is_model_cached (model_name , cache_dir )
119+ exists , exists_at , ref_file = is_model_cached (model_name , cache_dir )
120120 if exists :
121121 try :
122122 logger .info (f"Loading model and tokenizer from cache { exists_at } ..." )
@@ -135,8 +135,9 @@ def load_model_and_tokenizer(
135135 model_name , cache_dir = cache_dir , trust_remote_code = True
136136 )
137137 else :
138- logger .info (f"Did not find model and tokenizer in cache directory, downloading model "
139- f"and tokenizer from the internet and storing in cache { cache_dir } ..." )
138+ logger .info (f"Did not find model { model_name } and associated tokenizer in cache directory "
139+ f"(checked for model snapshot reference file { ref_file } ), downloading model and tokenizer "
140+ f"from the internet and storing in cache { cache_dir } ..." )
140141 model = model_loader .from_pretrained (
141142 model_name , cache_dir = cache_dir , trust_remote_code = True
142143 )
0 commit comments