7070SAMPLE_DATA_GH_REPO = 'OGGM/oggm-sample-data'
7171SAMPLE_DATA_COMMIT = '9bfeb6dfea9513f790877819d9a6cbd2c7b61611'
7272
73- CHECKSUM_URL = 'https://cluster.klima.uni-bremen.de/data/downloads.sha256.hdf'
74- CHECKSUM_VALIDATION_URL = CHECKSUM_URL + '.sha256'
75- CHECKSUM_LIFETIME = 24 * 60 * 60
76-
7773# Recommended url for runs
7874DEFAULT_BASE_URL = ('https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/'
7975 'L3-L5_files/2023.3/elev_bands/W5E5_spinup' )
@@ -180,88 +176,6 @@ def get_lock():
180176 return lock
181177
182178
183- def get_dl_verify_data (section ):
184- """Returns a pandas DataFrame with all known download object hashes.
185-
186- The returned dictionary resolves str: cache_obj_name (without section)
187- to a tuple int(size) and bytes(sha256)
188- """
189-
190- verify_key = 'dl_verify_data_' + section
191- if verify_key in cfg .DATA :
192- return cfg .DATA [verify_key ]
193-
194- verify_file_path = os .path .join (cfg .CACHE_DIR , 'downloads.sha256.hdf' )
195-
196- def verify_file (force = False ):
197- """Check the hash file's own hash"""
198- if not cfg .PARAMS ['has_internet' ]:
199- return
200-
201- if not force and os .path .isfile (verify_file_path ) and \
202- os .path .getmtime (verify_file_path ) + CHECKSUM_LIFETIME > time .time ():
203- return
204-
205- logger .info ('Checking the download verification file checksum...' )
206- try :
207- with requests .get (CHECKSUM_VALIDATION_URL ) as req :
208- req .raise_for_status ()
209- verify_file_sha256 = req .text .split (maxsplit = 1 )[0 ]
210- verify_file_sha256 = bytearray .fromhex (verify_file_sha256 )
211- except Exception as e :
212- verify_file_sha256 = None
213- logger .warning ('Failed getting verification checksum: ' + repr (e ))
214-
215- if os .path .isfile (verify_file_path ) and verify_file_sha256 :
216- sha256 = hashlib .sha256 ()
217- with open (verify_file_path , 'rb' ) as f :
218- for b in iter (lambda : f .read (0xFFFF ), b'' ):
219- sha256 .update (b )
220- if sha256 .digest () != verify_file_sha256 :
221- logger .warning ('%s changed or invalid, deleting.'
222- % (verify_file_path ))
223- os .remove (verify_file_path )
224- else :
225- os .utime (verify_file_path )
226-
227- if not np .any (['dl_verify_data_' in k for k in cfg .DATA .keys ()]):
228- # We check the hash file only once per session
229- # no need to do it at each call
230- verify_file ()
231-
232- if not os .path .isfile (verify_file_path ):
233- if not cfg .PARAMS ['has_internet' ]:
234- return pd .DataFrame ()
235-
236- logger .info ('Downloading %s to %s...'
237- % (CHECKSUM_URL , verify_file_path ))
238-
239- with requests .get (CHECKSUM_URL , stream = True ) as req :
240- if req .status_code == 200 :
241- mkdir (os .path .dirname (verify_file_path ))
242- with open (verify_file_path , 'wb' ) as f :
243- for b in req .iter_content (chunk_size = 0xFFFF ):
244- if b :
245- f .write (b )
246-
247- logger .info ('Done downloading.' )
248-
249- verify_file (force = True )
250-
251- if not os .path .isfile (verify_file_path ):
252- logger .warning ('Downloading and verifying checksums failed.' )
253- return pd .DataFrame ()
254-
255- try :
256- data = pd .read_hdf (verify_file_path , key = section )
257- except KeyError :
258- data = pd .DataFrame ()
259-
260- cfg .DATA [verify_key ] = data
261-
262- return data
263-
264-
265179def _call_dl_func (dl_func , cache_path ):
266180 """Helper so the actual call to downloads can be overridden
267181 """
@@ -332,44 +246,6 @@ def _cached_download_helper(cache_obj_name, dl_func, reset=False):
332246 return cache_path
333247
334248
335- def _verified_download_helper (cache_obj_name , dl_func , reset = False ):
336- """Helper function for downloads.
337-
338- Verifies the size and hash of the downloaded file against the included
339- list of known static files.
340- Uses _cached_download_helper to perform the actual download.
341- """
342- path = _cached_download_helper (cache_obj_name , dl_func , reset )
343-
344- dl_verify = cfg .PARAMS .get ('dl_verify' , False )
345-
346- if dl_verify and path and cache_obj_name not in cfg .DL_VERIFIED :
347- cache_section , cache_path = cache_obj_name .split ('/' , 1 )
348- data = get_dl_verify_data (cache_section )
349- if cache_path not in data .index :
350- logger .info ('No known hash for %s' % cache_obj_name )
351- cfg .DL_VERIFIED [cache_obj_name ] = True
352- else :
353- # compute the hash
354- sha256 = hashlib .sha256 ()
355- with open (path , 'rb' ) as f :
356- for b in iter (lambda : f .read (0xFFFF ), b'' ):
357- sha256 .update (b )
358- sha256 = sha256 .digest ()
359- size = os .path .getsize (path )
360-
361- # check
362- data = data .loc [cache_path ]
363- if data ['size' ] != size or bytes (data ['sha256' ]) != sha256 :
364- err = '%s failed to verify!\n is: %s %s\n expected: %s %s' % (
365- path , size , sha256 .hex (), data .iloc [0 ], data .iloc [1 ].hex ())
366- raise DownloadVerificationFailedException (msg = err , path = path )
367- logger .info ('%s verified successfully.' % path )
368- cfg .DL_VERIFIED [cache_obj_name ] = True
369-
370- return path
371-
372-
373249def _requests_urlretrieve (url , path , reporthook , auth = None , timeout = None ):
374250 """Implements the required features of urlretrieve on top of requests
375251 """
@@ -512,7 +388,7 @@ def oggm_urlretrieve(url, cache_obj_name=None, reset=False,
512388 reporthook = None , auth = None , timeout = None ):
513389 """Wrapper around urlretrieve, to implement our caching logic.
514390
515- Instead of accepting a destination path, it decided where to store the file
391+ Instead of accepting a destination path, it decides where to store the file
516392 and returns the local path.
517393
518394 auth is expected to be either a tuple of ('username', 'password') or None.
@@ -533,7 +409,7 @@ def _dlf(cache_path):
533409 timeout )
534410 return cache_path
535411
536- return _verified_download_helper (cache_obj_name , _dlf , reset )
412+ return _cached_download_helper (cache_obj_name , _dlf , reset )
537413
538414
539415def _progress_urlretrieve (url , cache_name = None , reset = False ,
@@ -595,7 +471,7 @@ def _aws_file_download_unlocked(aws_path, cache_name=None, reset=False):
595471 def _dlf (cache_path ):
596472 raise NotImplementedError ("Downloads from AWS are no longer supported" )
597473
598- return _verified_download_helper (cache_obj_name , _dlf , reset )
474+ return _cached_download_helper (cache_obj_name , _dlf , reset )
599475
600476
601477def file_downloader (www_path , retry_max = 3 , sleep_on_retry = 5 ,
@@ -793,7 +669,7 @@ def _always_none(foo):
793669 return None
794670
795671 cache_obj_name = _get_url_cache_name (wwwfile )
796- dest_file = _verified_download_helper (cache_obj_name , _always_none )
672+ dest_file = _cached_download_helper (cache_obj_name , _always_none )
797673
798674 # Grab auth parameters
799675 if not dest_file :
@@ -1317,14 +1193,14 @@ def get_geodetic_mb_dataframe(file_path=None):
13171193
13181194def get_temp_bias_dataframe (dataset = 'w5e5' ):
13191195 """Fetches the temperature bias dataframe created by the OGGM>=v16 pre-calibration
1320- (further explained in the OGGM mass balance tutorial:
1196+ (further explained in the OGGM mass balance tutorial:
13211197 https:// tutorials.oggm.org/stable/notebooks/tutorials/massbalance_calibration.html).
1322- The data preparation script is available at
1198+ The data preparation script is available at
13231199 https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/calibration/1.6.1/prepare_bias_map.ipynb
13241200
13251201 The file differs between climate datasets and OGGM versions. For W5E5 and OGGM v162, it is e.g.
13261202 https://cluster.klima.uni-bremen.de/~oggm/ref_mb_params/oggm_v1.6/w5e5_temp_bias_v2023.4.csv
1327-
1203+
13281204 Parameters
13291205 ----------
13301206 dataset : str
0 commit comments