77import xarray as xr
88import os
99
10- from .utils .io import download_http , check_download , handle_missing , download_ftp , download_sftp
10+ import paramiko
11+ import ftplib
12+ import requests
13+
14+ from .utils .io import check_download , handle_missing
1115
1216from d3tools import spatial as sp
1317from d3tools import timestepping as ts
@@ -164,13 +168,13 @@ def _loop_timesteps_and_save_data(self, timesteps: list[ts.TimeStep]) -> None:
164168 # the latter is more space efficient, but at times you have to download the data for several timesteps at once
165169 # so it is better to have the option to download all the data in a single folder
166170 if self .single_temp_folder :
167- with tempfile .TemporaryDirectory (ignore_cleanup_errors = True , prefix = os .getenv ('TMP' )) as tmp_path :
171+ with tempfile .TemporaryDirectory (ignore_cleanup_errors = True , dir = os .getenv ('TMP' )) as tmp_path :
168172 for timestep in timesteps :
169173 self ._get_and_save_data_ts (timestep , tmp_path )
170174 rm_at_exit (tmp_path )
171175 else :
172176 for timestep in timesteps :
173- with tempfile .TemporaryDirectory (ignore_cleanup_errors = True , prefix = os .getenv ('TMP' )) as tmp_path :
177+ with tempfile .TemporaryDirectory (ignore_cleanup_errors = True , dir = os .getenv ('TMP' )) as tmp_path :
174178 self ._get_and_save_data_ts (timestep , tmp_path )
175179 rm_at_exit (tmp_path )
176180
@@ -349,20 +353,14 @@ class URLDownloader(DOORDownloader):
349353
350354 name = "URL_Downloader"
351355
352- def __init__ (self , url_blank : str , protocol : str = 'http' , host : str | None = None ) -> None :
356+ def __init__ (self , url_blank : str , protocol : str = 'http' ) -> None :
353357
354358 self .url_blank = url_blank
355- if protocol .lower () not in ['http' , 'ftp' , 'sftp' , ' https' ]:
359+ if protocol .lower () not in ['http' , 'https' ]:
356360 raise ValueError (f'Protocol { protocol } not supported' )
357361 else :
358362 self .protocol = protocol .lower ()
359363
360- if self .protocol == 'ftp' or self .protocol == 'sftp' :
361- if host is None :
362- raise ValueError (f'FTP host must be specified' )
363- else :
364- self .host = host
365-
366364 super ().__init__ ()
367365
368366 def format_url (self , ** kwargs ) -> str :
@@ -382,14 +380,15 @@ def download(self, destination: str, min_size: float = None, missing_action: str
382380
383381 url = self .format_url (** kwargs )
384382 try :
385- if self .protocol == 'http' or self .protocol == 'https' :
386- download_http (url , destination , kwargs ["auth" ])
387- elif self .protocol == 'ftp' :
388- download_ftp (self .host , url , destination , kwargs ["auth" ])
389- elif self .protocol == 'sftp' :
390- download_sftp (self .host , url , destination , kwargs ["auth" ])
391- else :
392- raise ValueError (f'Protocol { self .protocol } not supported' )
383+ r = requests .get (url , kwargs ["auth" ])
384+ if r .status_code != 200 :
385+ raise FileNotFoundError (r .text )
386+
387+ os .makedirs (os .path .dirname (destination ), exist_ok = True )
388+
389+ with open (destination , 'wb' ) as f :
390+ f .write (r .content )
391+
393392 except Exception as e :
394393 handle_missing (missing_action , kwargs )
395394 self .log .debug (f'Error downloading { url } : { e } ' )
@@ -403,6 +402,98 @@ def download(self, destination: str, min_size: float = None, missing_action: str
403402
404403 return True
405404
405+ class FTPDownloader (DOORDownloader ):
406+ """
407+ Downloader for data from an FTP server via FTP or SFTP.
408+ This typer of downloader is useful for data that can be downloaded from an FTP server.
409+ It allows to specify a URL template with placeholders for various parameters (as keyword arguments).
410+ """
411+
412+ name = "FTP_Downloader"
413+
414+ def __init__ (self , host : str , port : int = 21 , protocol : str = 'ftp' , user : str = 'anonymous' , password : str = 'anonymous' ) -> None :
415+ if protocol .lower () not in ['ftp' , 'sftp' ]:
416+ raise ValueError (f'Protocol { protocol } not supported' )
417+ self .protocol = protocol .lower ()
418+
419+ self .host = host
420+ self .port = port
421+ self .user = user
422+ self .password = password
423+ super ().__init__ ()
424+
425+ if self .protocol == 'sftp' :
426+ self .transport = paramiko .Transport ((host , port ))
427+ self .transport .connect (username = user , password = password )
428+ self .client = paramiko .SFTPClient .from_transport (self .transport )
429+ elif self .protocol == 'ftp' :
430+ self .client = ftplib .FTP ()
431+ self .client .connect (host , port )
432+ self .client .login (user , password )
433+
434+ def __del__ (self ):
435+ """
436+ Close the FTP or SFTP client connection when the downloader is deleted.
437+ """
438+ if hasattr (self , 'client' ):
439+ try :
440+ self .client .close ()
441+ except Exception as e :
442+ self .log .debug (f'Error closing { self .protocol } client: { e } ' )
443+
444+ if hasattr (self , 'transport' ):
445+ try :
446+ self .transport .close ()
447+ except Exception as e :
448+ self .log .debug (f'Error closing { self .protocol } transport: { e } ' )
449+
450+ def download (self , blank_path , destination : str , min_size : float = None , missing_action : str = 'error' , ** kwargs ) -> bool :
451+ """
452+ Downloads data from FTP or SFTP server.
453+ Eventually check file size to avoid empty files.
454+ """
455+
456+ url = blank_path .format (** kwargs )
457+
458+ try :
459+ if self .protocol == 'sftp' :
460+ self .client .get (url , destination )
461+ elif self .protocol == 'ftp' :
462+ with open (destination , 'wb' ) as f :
463+ self .client .retrbinary (f'RETR { url } ' , f .write )
464+ except Exception as e :
465+ handle_missing (missing_action , kwargs )
466+ self .log .debug (f'Error downloading { url } via { self .protocol } : { e } ' )
467+ return False
468+
469+ success_flag , success_msg = check_download (destination , min_size , missing_action )
470+ if success_flag > 0 :
471+ handle_missing (missing_action , kwargs )
472+ self .log .debug (f'Error downloading file from { url } : { success_msg } ' )
473+ return False
474+
475+ return True
476+
477+ def check_data (self , blank_path , ** kwargs ) -> bool :
478+ """
479+ Check if the data is available on the FTP or SFTP server.
480+ This method can be used to check if the data is available before downloading it.
481+ """
482+ url = blank_path .format (** kwargs )
483+ if self .protocol == 'ftp' :
484+ # For FTP, we can use the 'nlst' command to check if the file exists
485+ if len (self .client .nlst (url )) > 0 :
486+ return True
487+ elif self .protocol == 'sftp' :
488+ try :
489+ self .client .stat (url )
490+ return True
491+ except Exception as e :
492+ pass
493+ return False
494+
495+
496+
406497class APIDownloader (DOORDownloader ):
407498 """
408499 Downloader for data from an API.
@@ -443,4 +534,4 @@ def download(self, destination: str, min_size: float = None, missing_action: str
443534 def retrieve (self , ** kwargs ):
444535 return self .client .retrieve (** kwargs )
445536
446- Downloader = DOORDownloader
537+ Downloader = DOORDownloader
0 commit comments