33import datetime
44import json
55import logging
6+ import re
67import time
78import typing
8- import urllib .error
99from pathlib import Path
1010from typing import Dict , List , Optional , Union
11- from urllib .error import HTTPError
1211
1312import requests
1413import shutil
3534from openeo .rest .models .logs import LogEntry , log_level_name , normalize_log_level
3635from openeo .util import ensure_dir
3736
38- MAX_RETRIES_DOWNLOAD = 3
39-
4037if typing .TYPE_CHECKING :
4138 # Imports for type checking only (circular import issue at runtime).
4239 from openeo .rest .connection import Connection
4542
4643
4744DEFAULT_JOB_RESULTS_FILENAME = "job-results.json"
48-
45+ MAX_RETRIES_PER_CHUNK = 3
46+ RETRIABLE_STATUSCODES = [408 , 429 , 500 , 501 , 502 , 503 , 504 ]
4947
5048class BatchJob :
5149 """
@@ -407,40 +405,9 @@ def download(
407405 target = target / self .name
408406 ensure_dir (target .parent )
409407 logger .info ("Downloading Job result asset {n!r} from {h!s} to {t!s}" .format (n = self .name , h = self .href , t = target ))
410- self . _download_chunked (target , chunk_size )
408+ _download_chunked (self . href , target , chunk_size )
411409 return target
412410
413- def _download_chunked (self , target : Path , chunk_size : int ):
414- file_size = None
415- try :
416- head = requests .head (self .href , stream = True )
417- if head .ok :
418- file_size = int (head .headers ['Content-Length' ])
419- else :
420- head .raise_for_status ()
421- with target .open ('wb' ) as f :
422- for from_byte_index in range (0 , file_size , chunk_size ):
423- to_byte_index = min (from_byte_index + chunk_size - 1 , file_size - 1 )
424- tries_left = MAX_RETRIES_DOWNLOAD
425- while tries_left > 0 :
426- try :
427- range_headers = {"Range" : f"bytes={ from_byte_index } -{ to_byte_index } " }
428- with requests .get (self .href , headers = range_headers , stream = True ) as r :
429- if r .ok :
430- shutil .copyfileobj (r .raw , f )
431- break
432- else :
433- r .raise_for_status ()
434- except requests .exceptions .HTTPError as error :
435- tries_left -= 1
436- if tries_left < 1 :
437- raise error
438- else :
439- logger .warning (f"Failed to retrieve chunk { from_byte_index } -{ to_byte_index } from { self .href } (status { error .response .status_code } ) - retrying" )
440- continue
441- except requests .exceptions .HTTPError as http_error :
442- raise OpenEoApiPlainError (message = f"Failed to download { self .href } " , http_status_code = http_error .response .status_code , error_message = http_error .response .text )
443-
444411 def _get_response (self , stream = True ) -> requests .Response :
445412 return self .job .connection .get (self .href , stream = stream )
446413
@@ -457,6 +424,51 @@ def load_bytes(self) -> bytes:
457424 # TODO: more `load` methods e.g.: load GTiff asset directly as numpy array
458425
459426
427+ def _download_chunked (url : str , target : Path , chunk_size : int ):
428+ try :
429+ file_size = _determine_content_length (url )
430+ with target .open ('wb' ) as f :
431+ for from_byte_index in range (0 , file_size , chunk_size ):
432+ to_byte_index = min (from_byte_index + chunk_size - 1 , file_size - 1 )
433+ tries_left = MAX_RETRIES_PER_CHUNK
434+ while tries_left > 0 :
435+ try :
436+ range_headers = {"Range" : f"bytes={ from_byte_index } -{ to_byte_index } " }
437+ with requests .get (url , headers = range_headers , stream = True ) as r :
438+ if r .ok :
439+ shutil .copyfileobj (r .raw , f )
440+ break
441+ else :
442+ r .raise_for_status ()
443+ except requests .exceptions .HTTPError as error :
444+ tries_left -= 1
445+ if tries_left > 0 and error .response .status_code in RETRIABLE_STATUSCODES :
446+ logger .warning (f"Failed to retrieve chunk { from_byte_index } -{ to_byte_index } from { url } (status { error .response .status_code } ) - retrying" )
447+ continue
448+ else :
449+ raise error
450+ except requests .exceptions .HTTPError as http_error :
451+ raise OpenEoApiPlainError (message = f"Failed to download { url } " , http_status_code = http_error .response .status_code , error_message = http_error .response .text )
452+
453+
454+ def _determine_content_length (url : str ) -> int :
455+ range_0_0_response = requests .get (url , headers = {"Range" : f"bytes=0-0" })
456+ if range_0_0_response .status_code == 206 :
457+ content_range_header = range_0_0_response .headers .get ("Content-Range" )
458+ match = re .match (r"^bytes \d+-\d+/(\d+)$" , content_range_header )
459+ if match :
460+ return int (match .group (1 ))
461+
462+ content_range_prefix = "bytes 0-0/"
463+ if content_range_header .startswith (content_range_prefix ):
464+ return int (content_range_header [len (content_range_prefix ):])
465+ head = requests .head (url , stream = True )
466+ if head .ok :
467+ return int (head .headers ['Content-Length' ])
468+ else :
469+ head .raise_for_status ()
470+
471+
460472class MultipleAssetException (OpenEoClientException ):
461473 pass
462474
0 commit comments