11import datetime
2- import os
32import shutil
43import traceback
54from functools import lru_cache
@@ -443,7 +442,7 @@ def _open_urls(
443442 def get (
444443 self ,
445444 granules : Union [List [DataGranule ], List [str ]],
446- local_path : Optional [str ] = None ,
445+ local_path : Optional [Path ] = None ,
447446 provider : Optional [str ] = None ,
448447 threads : int = 8 ,
449448 ) -> List [str ]:
@@ -466,11 +465,10 @@ def get(
466465 List of downloaded files
467466 """
468467 if local_path is None :
469- local_path = os .path .join (
470- "." ,
471- "data" ,
472- f"{ datetime .datetime .today ().strftime ('%Y-%m-%d' )} -{ uuid4 ().hex [:6 ]} " ,
473- )
468+ today = datetime .datetime .today ().strftime ("%Y-%m-%d" )
469+ uuid = uuid4 ().hex [:6 ]
470+ local_path = Path .cwd () / "data" / f"{ today } -{ uuid } "
471+
474472 if len (granules ):
475473 files = self ._get (granules , local_path , provider , threads )
476474 return files
@@ -481,7 +479,7 @@ def get(
481479 def _get (
482480 self ,
483481 granules : Union [List [DataGranule ], List [str ]],
484- local_path : str ,
482+ local_path : Path ,
485483 provider : Optional [str ] = None ,
486484 threads : int = 8 ,
487485 ) -> List [str ]:
@@ -509,7 +507,7 @@ def _get(
509507 def _get_urls (
510508 self ,
511509 granules : List [str ],
512- local_path : str ,
510+ local_path : Path ,
513511 provider : Optional [str ] = None ,
514512 threads : int = 8 ,
515513 ) -> List [str ]:
@@ -525,8 +523,8 @@ def _get_urls(
525523 s3_fs = self .get_s3fs_session (provider = provider )
526524 # TODO: make this parallel or concurrent
527525 for file in data_links :
528- s3_fs .get (file , local_path )
529- file_name = os . path . join ( local_path , os . path . basename (file ))
526+ s3_fs .get (file , str ( local_path ) )
527+ file_name = local_path / Path (file ). name
530528 print (f"Downloaded: { file_name } " )
531529 downloaded_files .append (file_name )
532530 return downloaded_files
@@ -539,7 +537,7 @@ def _get_urls(
539537 def _get_granules (
540538 self ,
541539 granules : List [DataGranule ],
542- local_path : str ,
540+ local_path : Path ,
543541 provider : Optional [str ] = None ,
544542 threads : int = 8 ,
545543 ) -> List [str ]:
@@ -571,8 +569,8 @@ def _get_granules(
571569 s3_fs = self .get_s3fs_session (provider = provider )
572570 # TODO: make this async
573571 for file in data_links :
574- s3_fs .get (file , local_path )
575- file_name = os . path . join ( local_path , os . path . basename (file ))
572+ s3_fs .get (file , str ( local_path ) )
573+ file_name = local_path / Path (file ). name
576574 print (f"Downloaded: { file_name } " )
577575 downloaded_files .append (file_name )
578576 return downloaded_files
@@ -581,7 +579,7 @@ def _get_granules(
581579 # it will be downloaded as if it was on prem
582580 return self ._download_onprem_granules (data_links , local_path , threads )
583581
584- def _download_file (self , url : str , directory : str ) -> str :
582+ def _download_file (self , url : str , directory : Path ) -> str :
585583 """Download a single file from an on-prem location, a DAAC data center.
586584
587585 Parameters:
@@ -595,9 +593,8 @@ def _download_file(self, url: str, directory: str) -> str:
595593 if "opendap" in url and url .endswith (".html" ):
596594 url = url .replace (".html" , "" )
597595 local_filename = url .split ("/" )[- 1 ]
598- path = Path (directory ) / Path (local_filename )
599- local_path = str (path )
600- if not os .path .exists (local_path ):
596+ path = directory / Path (local_filename )
597+ if not path .exists ():
601598 try :
602599 session = self .auth .get_session ()
603600 with session .get (
@@ -606,7 +603,7 @@ def _download_file(self, url: str, directory: str) -> str:
606603 allow_redirects = True ,
607604 ) as r :
608605 r .raise_for_status ()
609- with open (local_path , "wb" ) as f :
606+ with open (path , "wb" ) as f :
610607 # This is to cap memory usage for large files at 1MB per write to disk per thread
611608 # https://docs.python-requests.org/en/latest/user/quickstart/#raw-response-content
612609 shutil .copyfileobj (r .raw , f , length = 1024 * 1024 )
@@ -616,10 +613,10 @@ def _download_file(self, url: str, directory: str) -> str:
616613 raise Exception
617614 else :
618615 print (f"File { local_filename } already downloaded" )
619- return local_path
616+ return str ( path )
620617
621618 def _download_onprem_granules (
622- self , urls : List [str ], directory : str , threads : int = 8
619+ self , urls : List [str ], directory : Path , threads : int = 8
623620 ) -> List [Any ]:
624621 """Downloads a list of URLS into the data directory.
625622
@@ -638,8 +635,7 @@ def _download_onprem_granules(
638635 raise ValueError (
639636 "We need to be logged into NASA EDL in order to download data granules"
640637 )
641- if not os .path .exists (directory ):
642- os .makedirs (directory )
638+ directory .mkdir (parents = True , exist_ok = True )
643639
644640 arguments = [(url , directory ) for url in urls ]
645641 results = pqdm (
0 commit comments