55from pathlib import Path
66from enum import Enum
77from logging import getLogger
8- from typing import Optional , Tuple , Union , List , Dict
8+ from typing import Optional , Tuple , Union , List , Dict , Iterable
99from urllib .parse import urlparse , quote
1010from uuid import UUID
1111
@@ -182,8 +182,8 @@ def __init__(self, project_id: UUID, dataset_id: UUID, session: Session):
182182
183183 def _get_path (self ,
184184 uid : Optional [Union [UUID , str ]] = None ,
185- ignore_dataset : Optional [bool ] = False ,
186185 * ,
186+ ignore_dataset : Optional [bool ] = False ,
187187 version : Union [str , UUID ] = None ,
188188 action : str = None ) -> str :
189189 """Build the path for taking an action with a particular file version."""
@@ -248,7 +248,7 @@ def get(self,
248248 * ,
249249 version : Optional [Union [UUID , str , int ]] = None ) -> FileLink :
250250 """
251- Get an element of the collection by its id .
251+ Retrieve an on-platform FileLink from its filename or file uuid .
252252
253253 Parameters
254254 ----------
@@ -359,7 +359,7 @@ def _make_upload_request(self, file_path: Path, dest_name: str):
359359 aws_session_token, bucket, object_key, & upload_id.
360360
361361 """
362- path = self ._get_path () + "/ uploads"
362+ path = self ._get_path (action = " uploads")
363363 mime_type = self ._mime_type (file_path )
364364 file_size = file_path .stat ().st_size
365365 assert isinstance (file_size , int )
@@ -423,7 +423,7 @@ def _search_by_file_name(self,
423423 All the data needed for a file.
424424
425425 """
426- path = self ._get_path () + "/ search"
426+ path = self ._get_path (action = " search")
427427
428428 search_json = {
429429 'fileSearchFilter' :
@@ -456,7 +456,7 @@ def _search_by_file_version_id(self,
456456 All the data needed for a file.
457457
458458 """
459- path = self ._get_path () + "/ search"
459+ path = self ._get_path (action = " search")
460460
461461 search_json = {
462462 'fileSearchFilter' : {
@@ -495,7 +495,7 @@ def _search_by_dataset_file_id(self,
495495 All the data needed for a file.
496496
497497 """
498- path = self ._get_path () + "/ search"
498+ path = self ._get_path (action = " search")
499499
500500 search_json = {
501501 'fileSearchFilter' : {
@@ -644,14 +644,12 @@ def read(self, *, file_link: Union[str, UUID, FileLink]):
644644
645645 if self ._is_external_url (file_link .url ): # Pull it from where ever it lives
646646 final_url = file_link .url
647- elif self . _validate_local_url ( file_link . url ) :
647+ else :
648648 # The "/content-link" route returns a pre-signed url to download the file.
649649 content_link = self ._get_path_from_file_link (file_link , action = 'content-link' )
650650 content_link_response = self .session .get_resource (content_link )
651651 pre_signed_url = content_link_response ['pre_signed_read_link' ]
652652 final_url = rewrite_s3_links_locally (pre_signed_url , self .session .s3_endpoint_url )
653- else : # Unrecognized
654- raise ValueError (f"URL was malformed for a local file resource ({ file_link .url } )." )
655653
656654 download_response = requests .get (final_url )
657655 return download_response .content
@@ -690,10 +688,10 @@ def process(self, *, file_link: Union[FileLink, str, UUID],
690688 A JobSubmissionResponse which can be used to poll for the result.
691689
692690 """
693- file_link = self ._resolve_file_link (file_link )
694- if not self ._validate_local_url (file_link .url ):
691+ if self ._is_external_url (file_link .url ):
695692 raise ValueError (f"Only on-platform resources can be processed. "
696693 f"Passed URL { file_link .url } ." )
694+ file_link = self ._resolve_file_link (file_link )
697695
698696 params = {"processing_type" : processing_type .value }
699697 response = self .session .put_resource (
@@ -797,6 +795,38 @@ def file_processing_result(self, *,
797795
798796 return results
799797
798+ def ingest (self , files : Iterable [FileLink ]):
799+ """
800+ [ALPHA] Ingest a set of CSVs and/or Excel Workbooks formatted per the gemd-ingest protocol.
801+
802+ Parameters
803+ ----------
804+ files: List[FileLink]
805+ A list of files, already on platform, from which GEMD objects should be built
806+
807+ """
808+ targets = [self ._resolve_file_link (f ) for f in files ]
809+ if any (self ._is_external_url (f .url ) for f in targets ):
810+ externals = [f .url for f in targets if self ._is_external_url (f .url )]
811+ raise ValueError (f"All files must be on-platform to load them. "
812+ f"The following are not: { externals } " )
813+
814+ file_infos = [
815+ {"dataset_file_id" : str (f .uid ),
816+ "file_version_uuid" : str (f .version )
817+ }
818+ for f in targets ]
819+ req = {
820+ "project_id" : str (self .project_id ),
821+ "dataset_id" : str (self .dataset_id ),
822+ "files" : file_infos
823+ }
824+ base_url = format_escaped_url ("/projects/{}/ingestions" , self .project_id )
825+ create_ingestion_resp = self .session .post_resource (path = base_url , json = req )
826+ ingestion_id = create_ingestion_resp ["ingestion_id" ]
827+ job_url = base_url + format_escaped_url ("/{}/gemd-objects" , ingestion_id )
828+ return self .session .post_resource (path = job_url , json = {})
829+
800830 def delete (self , file_link : FileLink ):
801831 """
802832 Delete the file associated with a given FileLink from the database.
@@ -817,8 +847,25 @@ def delete(self, file_link: FileLink):
817847
818848 def _resolve_file_link (self , identifier : Union [str , UUID , FileLink ]) -> FileLink :
819849 """Generate the FileLink object referenced by the passed argument."""
820- if isinstance (identifier , FileLink ): # Passthrough for convenience
821- return identifier
850+ if isinstance (identifier , GEMDFileLink ):
851+ if isinstance (identifier , FileLink ) and identifier .uid is not None :
852+ # Passthrough since it's as full as it can get
853+ return identifier
854+ if self ._is_external_url (identifier .url ):
855+ # Up-convert type with existing info
856+ return FileLink (filename = identifier .filename , url = identifier .url )
857+ # Resolve on-platform uid and possibly up-convert
858+ file_id , version_id = self ._get_ids_from_url (identifier .url )
859+ if file_id is None :
860+ raise ValueError (f"URL was malformed for local resources; "
861+ f"passed URL { identifier .url } " )
862+ platform_link = self .get (uid = file_id , version = version_id )
863+ if platform_link .filename != identifier .filename :
864+ raise ValueError (
865+ f"Name mismatch between link ({ identifier .filename } ) "
866+ f"and platform ({ platform_link .filename } )"
867+ )
868+ return platform_link
822869 elif isinstance (identifier , str ) and self ._is_external_url (identifier ):
823870 # Assume it's an absolute URL
824871 filename = urlparse (identifier ).path .split ('/' )[- 1 ]
@@ -848,10 +895,3 @@ def _is_external_url(self, url: str):
848895 return False
849896
850897 return urlparse (self ._get_path ()).netloc != parsed .netloc
851-
852- def _validate_local_url (self , url ):
853- """Verify link is well formed."""
854- if self ._is_external_url (url ):
855- return False
856-
857- return self ._get_ids_from_url (url )[1 ] is not None # Implies file_id is None, too
0 commit comments