18
18
import json
19
19
import os
20
20
import re
21
- import shutil
22
21
import sys
23
22
import tempfile
24
- import traceback
25
23
import warnings
26
24
from concurrent import futures
27
25
from pathlib import Path
40
38
create_branch ,
41
39
create_commit ,
42
40
create_repo ,
43
- get_hf_file_metadata ,
44
41
hf_hub_download ,
45
42
hf_hub_url ,
46
43
try_to_load_from_cache ,
@@ -86,7 +83,6 @@ def is_offline_mode():
86
83
87
84
torch_cache_home = os .getenv ("TORCH_HOME" , os .path .join (os .getenv ("XDG_CACHE_HOME" , "~/.cache" ), "torch" ))
88
85
default_cache_path = constants .default_cache_path
89
- old_default_cache_path = os .path .join (torch_cache_home , "transformers" )
90
86
91
87
# Determine default cache directory. Lots of legacy environment variables to ensure backward compatibility.
92
88
# The best way to set the cache path is with the environment variable HF_HOME. For more details, checkout this
@@ -100,23 +96,6 @@ def is_offline_mode():
100
96
PYTORCH_TRANSFORMERS_CACHE = os .getenv ("PYTORCH_TRANSFORMERS_CACHE" , PYTORCH_PRETRAINED_BERT_CACHE )
101
97
TRANSFORMERS_CACHE = os .getenv ("TRANSFORMERS_CACHE" , PYTORCH_TRANSFORMERS_CACHE )
102
98
103
- # Onetime move from the old location to the new one if no ENV variable has been set.
104
- if (
105
- os .path .isdir (old_default_cache_path )
106
- and not os .path .isdir (constants .HF_HUB_CACHE )
107
- and "PYTORCH_PRETRAINED_BERT_CACHE" not in os .environ
108
- and "PYTORCH_TRANSFORMERS_CACHE" not in os .environ
109
- and "TRANSFORMERS_CACHE" not in os .environ
110
- ):
111
- logger .warning (
112
- "In Transformers v4.22.0, the default path to cache downloaded models changed from"
113
- " '~/.cache/torch/transformers' to '~/.cache/huggingface/hub'. Since you don't seem to have"
114
- " overridden and '~/.cache/torch/transformers' is a directory that exists, we're moving it to"
115
- " '~/.cache/huggingface/hub' to avoid redownloading models you have already in the cache. You should"
116
- " only see this message once."
117
- )
118
- shutil .move (old_default_cache_path , constants .HF_HUB_CACHE )
119
-
120
99
HF_MODULES_CACHE = os .getenv ("HF_MODULES_CACHE" , os .path .join (constants .HF_HOME , "modules" ))
121
100
TRANSFORMERS_DYNAMIC_MODULE_NAME = "transformers_modules"
122
101
SESSION_ID = uuid4 ().hex
@@ -1087,47 +1066,6 @@ def get_checkpoint_shard_files(
1087
1066
return cached_filenames , sharded_metadata
1088
1067
1089
1068
1090
- # All what is below is for conversion between old cache format and new cache format.
1091
-
1092
-
1093
- def get_all_cached_files (cache_dir = None ):
1094
- """
1095
- Returns a list for all files cached with appropriate metadata.
1096
- """
1097
- if cache_dir is None :
1098
- cache_dir = TRANSFORMERS_CACHE
1099
- else :
1100
- cache_dir = str (cache_dir )
1101
- if not os .path .isdir (cache_dir ):
1102
- return []
1103
-
1104
- cached_files = []
1105
- for file in os .listdir (cache_dir ):
1106
- meta_path = os .path .join (cache_dir , f"{ file } .json" )
1107
- if not os .path .isfile (meta_path ):
1108
- continue
1109
-
1110
- with open (meta_path , encoding = "utf-8" ) as meta_file :
1111
- metadata = json .load (meta_file )
1112
- url = metadata ["url" ]
1113
- etag = metadata ["etag" ].replace ('"' , "" )
1114
- cached_files .append ({"file" : file , "url" : url , "etag" : etag })
1115
-
1116
- return cached_files
1117
-
1118
-
1119
- def extract_info_from_url (url ):
1120
- """
1121
- Extract repo_name, revision and filename from an url.
1122
- """
1123
- search = re .search (r"^https://huggingface\.co/(.*)/resolve/([^/]*)/(.*)$" , url )
1124
- if search is None :
1125
- return None
1126
- repo , revision , filename = search .groups ()
1127
- cache_repo = "--" .join (["models" ] + repo .split ("/" ))
1128
- return {"repo" : cache_repo , "revision" : revision , "filename" : filename }
1129
-
1130
-
1131
1069
def create_and_tag_model_card (
1132
1070
repo_id : str ,
1133
1071
tags : Optional [List [str ]] = None ,
@@ -1168,88 +1106,6 @@ def create_and_tag_model_card(
1168
1106
return model_card
1169
1107
1170
1108
1171
- def clean_files_for (file ):
1172
- """
1173
- Remove, if they exist, file, file.json and file.lock
1174
- """
1175
- for f in [file , f"{ file } .json" , f"{ file } .lock" ]:
1176
- if os .path .isfile (f ):
1177
- os .remove (f )
1178
-
1179
-
1180
- def move_to_new_cache (file , repo , filename , revision , etag , commit_hash ):
1181
- """
1182
- Move file to repo following the new huggingface hub cache organization.
1183
- """
1184
- os .makedirs (repo , exist_ok = True )
1185
-
1186
- # refs
1187
- os .makedirs (os .path .join (repo , "refs" ), exist_ok = True )
1188
- if revision != commit_hash :
1189
- ref_path = os .path .join (repo , "refs" , revision )
1190
- with open (ref_path , "w" ) as f :
1191
- f .write (commit_hash )
1192
-
1193
- # blobs
1194
- os .makedirs (os .path .join (repo , "blobs" ), exist_ok = True )
1195
- blob_path = os .path .join (repo , "blobs" , etag )
1196
- shutil .move (file , blob_path )
1197
-
1198
- # snapshots
1199
- os .makedirs (os .path .join (repo , "snapshots" ), exist_ok = True )
1200
- os .makedirs (os .path .join (repo , "snapshots" , commit_hash ), exist_ok = True )
1201
- pointer_path = os .path .join (repo , "snapshots" , commit_hash , filename )
1202
- huggingface_hub .file_download ._create_relative_symlink (blob_path , pointer_path )
1203
- clean_files_for (file )
1204
-
1205
-
1206
- def move_cache (cache_dir = None , new_cache_dir = None , token = None ):
1207
- if new_cache_dir is None :
1208
- new_cache_dir = TRANSFORMERS_CACHE
1209
- if cache_dir is None :
1210
- # Migrate from old cache in .cache/huggingface/transformers
1211
- old_cache = Path (TRANSFORMERS_CACHE ).parent / "transformers"
1212
- if os .path .isdir (str (old_cache )):
1213
- cache_dir = str (old_cache )
1214
- else :
1215
- cache_dir = new_cache_dir
1216
- cached_files = get_all_cached_files (cache_dir = cache_dir )
1217
- logger .info (f"Moving { len (cached_files )} files to the new cache system" )
1218
-
1219
- hub_metadata = {}
1220
- for file_info in tqdm (cached_files ):
1221
- url = file_info .pop ("url" )
1222
- if url not in hub_metadata :
1223
- try :
1224
- hub_metadata [url ] = get_hf_file_metadata (url , token = token )
1225
- except requests .HTTPError :
1226
- continue
1227
-
1228
- etag , commit_hash = hub_metadata [url ].etag , hub_metadata [url ].commit_hash
1229
- if etag is None or commit_hash is None :
1230
- continue
1231
-
1232
- if file_info ["etag" ] != etag :
1233
- # Cached file is not up to date, we just throw it as a new version will be downloaded anyway.
1234
- clean_files_for (os .path .join (cache_dir , file_info ["file" ]))
1235
- continue
1236
-
1237
- url_info = extract_info_from_url (url )
1238
- if url_info is None :
1239
- # Not a file from huggingface.co
1240
- continue
1241
-
1242
- repo = os .path .join (new_cache_dir , url_info ["repo" ])
1243
- move_to_new_cache (
1244
- file = os .path .join (cache_dir , file_info ["file" ]),
1245
- repo = repo ,
1246
- filename = url_info ["filename" ],
1247
- revision = url_info ["revision" ],
1248
- etag = etag ,
1249
- commit_hash = commit_hash ,
1250
- )
1251
-
1252
-
1253
1109
class PushInProgress :
1254
1110
"""
1255
1111
Internal class to keep track of a push in progress (which might contain multiple `Future` jobs).
@@ -1271,55 +1127,3 @@ def cancel(self) -> None:
1271
1127
# Cancel the job if it wasn't started yet and remove cancelled/done jobs from the list
1272
1128
if not (job .cancel () or job .done ())
1273
1129
]
1274
-
1275
-
1276
- cache_version_file = os .path .join (TRANSFORMERS_CACHE , "version.txt" )
1277
- if not os .path .isfile (cache_version_file ):
1278
- cache_version = 0
1279
- else :
1280
- with open (cache_version_file ) as f :
1281
- try :
1282
- cache_version = int (f .read ())
1283
- except ValueError :
1284
- cache_version = 0
1285
-
1286
- cache_is_not_empty = os .path .isdir (TRANSFORMERS_CACHE ) and len (os .listdir (TRANSFORMERS_CACHE )) > 0
1287
-
1288
- if cache_version < 1 and cache_is_not_empty :
1289
- if is_offline_mode ():
1290
- logger .warning (
1291
- "You are offline and the cache for model files in Transformers v4.22.0 has been updated while your local "
1292
- "cache seems to be the one of a previous version. It is very likely that all your calls to any "
1293
- "`from_pretrained()` method will fail. Remove the offline mode and enable internet connection to have "
1294
- "your cache be updated automatically, then you can go back to offline mode."
1295
- )
1296
- else :
1297
- logger .warning (
1298
- "The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a "
1299
- "one-time only operation. You can interrupt this and resume the migration later on by calling "
1300
- "`transformers.utils.move_cache()`."
1301
- )
1302
- try :
1303
- if TRANSFORMERS_CACHE != constants .HF_HUB_CACHE :
1304
- # Users set some env variable to customize cache storage
1305
- move_cache (TRANSFORMERS_CACHE , TRANSFORMERS_CACHE )
1306
- else :
1307
- move_cache ()
1308
- except Exception as e :
1309
- trace = "\n " .join (traceback .format_tb (e .__traceback__ ))
1310
- logger .error (
1311
- f"There was a problem when trying to move your cache:\n \n { trace } \n { e .__class__ .__name__ } : { e } \n \n Please "
1312
- "file an issue at https://github.com/huggingface/transformers/issues/new/choose and copy paste this whole "
1313
- "message and we will do our best to help."
1314
- )
1315
-
1316
- if cache_version < 1 :
1317
- try :
1318
- os .makedirs (TRANSFORMERS_CACHE , exist_ok = True )
1319
- with open (cache_version_file , "w" ) as f :
1320
- f .write ("1" )
1321
- except Exception :
1322
- logger .warning (
1323
- f"There was a problem when trying to write in your cache folder ({ TRANSFORMERS_CACHE } ). You should set "
1324
- "the environment variable TRANSFORMERS_CACHE to a writable directory."
1325
- )
0 commit comments