@@ -415,19 +415,20 @@ def __download_file__(url, filename, vault_token_file=None, auth_url=None, clien
415415
416416 # --- 1. Get redirect URL by requesting HEAD ---
417417 response = requests .head (url , stream = True )
418- url = response .headers .get ("Location" ) # update URL to the final one after redirects
419- # print(f"Status code: {response.status_code} \nResponse code: {response.status_code}\nHeaders: {response.headers}")
420- print ("URL after redirects:" , url )
418+
419+ # Check for redirect and update URL if necessary
420+ if response .headers .get ("Location" ) and response .status_code in [301 , 302 , 303 , 307 , 308 ]:
421+ url = response .headers .get ("Location" )
422+ print ("Redirects url: " , url )
421423
422424 # --- 2. Try direct GET ---
423425 response = requests .get (url , stream = True , allow_redirects = False ) # no redirects here, we want to see if auth is required
424426 www = response .headers .get ('WWW-Authenticate' , '' ) # get WWW-Authenticate header if present to check for Bearer auth
425- # print(f"Status code: {response.status_code} \nResponse code: {response.status_code}\nHeaders: {response.headers}")
426427
427428 if (response .status_code == 401 or "bearer" in www .lower ()):
428429 print (f"Authentication required for { url } " )
429430 if not (vault_token_file ):
430- raise RuntimeError ( "Authentication required but no vault_token provided " )
431+ raise ValueError ( "Vault token file not given for protected download " )
431432
432433 # --- 3. Fetch Vault token ---
433434 vault_token = __get_vault_access__ (url , vault_token_file , auth_url , client_id )
@@ -449,7 +450,7 @@ def __download_file__(url, filename, vault_token_file=None, auth_url=None, clien
449450 progress_bar .close ()
450451
451452 if total_size_in_bytes != 0 and progress_bar .n != total_size_in_bytes :
452- print ( "ERROR, something went wrong" )
453+ raise
453454
454455
455456def __get_vault_access__ (download_url : str ,
@@ -575,7 +576,9 @@ def __download_list__(urls: List[str],
575576
576577 file = url .split ("/" )[- 1 ]
577578 filename = os .path .join (localDir , file )
579+ print ("\n " )
578580 __download_file__ (url = url , filename = filename , vault_token_file = vault_token_file , auth_url = auth_url , client_id = client_id )
581+ print ("\n " )
579582
580583
581584def __get_databus_id_parts__ (uri : str ) -> Tuple [Optional [str ], Optional [str ], Optional [str ], Optional [str ], Optional [str ], Optional [str ]]:
@@ -607,29 +610,33 @@ def download(
607610 for databusURI in databusURIs :
608611 host , account , group , artifact , version , file = __get_databus_id_parts__ (databusURI )
609612
610- # Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus)
611- if endpoint is None :
612- endpoint = f"https://{ host } /sparql"
613- print (f"SPARQL endpoint { endpoint } " )
614-
615613 # dataID or databus collection
616614 if databusURI .startswith ("http://" ) or databusURI .startswith ("https://" ):
615+ # Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus)
616+ if endpoint is None :
617+ endpoint = f"https://{ host } /sparql"
618+ print (f"SPARQL endpoint { endpoint } " )
619+
617620 # databus collection
618621 if "/collections/" in databusURI : # TODO "in" is not safe! there could be an artifact named collections, need to check for the correct part position in the URI
619622 query = __handle_databus_collection__ (databusURI )
620623 res = __handle_databus_file_query__ (endpoint , query )
621624 __download_list__ (res , localDir )
622- # databus artifact version // https://(databus.dbpedia.org|databus.dev.dbpedia.link)/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION
625+ # databus file
623626 elif file is not None :
624- print ("fileId not supported yet" ) # TODO
627+ __download_list__ ([databusURI ], localDir , vault_token_file = vault_token_file , auth_url = auth_url , client_id = client_id )
628+ # databus artifact version
625629 elif version is not None :
626630 json_str = __handle_databus_artifact_version__ (databusURI )
627631 res = __handle_databus_file_json__ (json_str )
628632 __download_list__ (res , localDir , vault_token_file = vault_token_file , auth_url = auth_url , client_id = client_id )
633+ # databus artifact
629634 elif artifact is not None :
630635 print ("artifactId not supported yet" ) # TODO
636+ # databus group
631637 elif group is not None :
632638 print ("groupId not supported yet" ) # TODO
639+ # databus account
633640 elif account is not None :
634641 print ("accountId not supported yet" ) # TODO
635642 else :
@@ -640,5 +647,7 @@ def download(
640647 # query as argument
641648 else :
642649 print ("QUERY {}" , databusURI .replace ("\n " , " " ))
650+ if endpoint is None : # endpoint is required for queries (--databus)
651+ raise ValueError ("No endpoint given for query" )
643652 res = __handle_databus_file_query__ (endpoint , databusURI )
644653 __download_list__ (res , localDir )
0 commit comments