@@ -413,26 +413,27 @@ def __download_file__(url, filename, vault_token_file=None, auth_url=None, clien
413413 print ("Download file: " + url )
414414 os .makedirs (os .path .dirname (filename ), exist_ok = True ) # Create the necessary directories
415415
416- # --- 1. Try without token ---
417- response = requests .get (url , stream = True , allow_redirects = False )
418- # print("Response code:", response.status_code)
419- # print(f"Status code: {response.status_code}")
420- # print(f"Headers: {response.headers}")
416+ # --- 1. Get redirect URL by requesting HEAD ---
417+ response = requests .head (url , stream = True )
421418 url = response .headers .get ("Location" ) # update URL to the final one after redirects
419+ # print(f"Status code: {response.status_code} \nResponse code: {response.status_code}\nHeaders: {response.headers}")
422420 print ("URL after redirects:" , url )
423- # print(f"Full response: {response}")
424- # exit(0)
425421
426- if (response .status_code == 401 or "WWW-Authenticate" in response .headers or url .startswith ("https://data.dbpedia.io/databus.dbpedia.org" )):
422+ # --- 2. Try direct GET ---
423+ response = requests .get (url , stream = True , allow_redirects = False ) # no redirects here, we want to see if auth is required
424+ www = response .headers .get ('WWW-Authenticate' , '' ) # get WWW-Authenticate header if present to check for Bearer auth
425+ # print(f"Status code: {response.status_code} \nResponse code: {response.status_code}\nHeaders: {response.headers}")
426+
427+ if (response .status_code == 401 or "bearer" in www .lower ()):
427428 print (f"Authentication required for { url } " )
428429 if not (vault_token_file ):
429430 raise RuntimeError ("Authentication required but no vault_token provided" )
430431
431- # --- 2 . Fetch Vault token ---
432+ # --- 3 . Fetch Vault token ---
432433 vault_token = __get_vault_access__ (url , vault_token_file , auth_url , client_id )
433434 headers = {"Authorization" : f"Bearer { vault_token } " }
434435
435- # --- 3 . Retry with token ---
436+ # --- 4 . Retry with token ---
436437 response = requests .get (url , headers = headers , stream = True )
437438
438439 response .raise_for_status () # Raise if still failing
@@ -567,11 +568,23 @@ def __download_list__(urls: List[str],
567568 auth_url : str = None ,
568569 client_id : str = None ) -> None :
569570 for url in urls :
571+ if localDir is None :
572+ host , account , group , artifact , version , file = __get_databus_id_parts__ (url )
573+ localDir = os .path .join (os .getcwd (), account , group , artifact , version if version is not None else "latest" )
574+ print (f"Local directory not given, using { localDir } " )
575+
570576 file = url .split ("/" )[- 1 ]
571577 filename = os .path .join (localDir , file )
572578 __download_file__ (url = url , filename = filename , vault_token_file = vault_token_file , auth_url = auth_url , client_id = client_id )
573579
574580
581+ def __get_databus_id_parts__ (uri : str ) -> Tuple [Optional [str ], Optional [str ], Optional [str ], Optional [str ], Optional [str ], Optional [str ]]:
582+ uri = uri .removeprefix ("https://" ).removeprefix ("http://" )
583+ parts = uri .strip ("/" ).split ("/" )
584+ parts += [None ] * (6 - len (parts )) # pad with None if less than 6 parts
585+ return tuple (parts [:6 ]) # return only the first 6 parts
586+
587+
575588def download (
576589 localDir : str ,
577590 endpoint : str ,
@@ -591,15 +604,14 @@ def download(
591604 client_id: Client ID for token exchange
592605 """
593606
594- # Auto-detect sparql endpoint from first databusURI if not given -> no need to specify endpoint (--databus)
595- if endpoint is None :
596- host = databusURIs [0 ].split ("/" )[2 ]
597- endpoint = f"https://{ host } /sparql"
598- print (f"SPARQL endpoint { endpoint } " )
607+ for databusURI in databusURIs :
608+ host , account , group , artifact , version , file = __get_databus_id_parts__ (databusURI )
599609
600- databusVersionPattern = re .compile (r"^https://(databus\.dbpedia\.org|databus\.dev\.dbpedia\.link)/[^/]+/[^/]+/[^/]+/[^/]+/?$" )
610+ # Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus)
611+ if endpoint is None :
612+ endpoint = f"https://{ host } /sparql"
613+ print (f"SPARQL endpoint { endpoint } " )
601614
602- for databusURI in databusURIs :
603615 # dataID or databus collection
604616 if databusURI .startswith ("http://" ) or databusURI .startswith ("https://" ):
605617 # databus collection
@@ -608,12 +620,20 @@ def download(
608620 res = __handle_databus_file_query__ (endpoint , query )
609621 __download_list__ (res , localDir )
610622 # databus artifact version // https://(databus.dbpedia.org|databus.dev.dbpedia.link)/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION
611- elif databusVersionPattern .match (databusURI ):
623+ elif file is not None :
624+ print ("fileId not supported yet" ) # TODO
625+ elif version is not None :
612626 json_str = __handle_databus_artifact_version__ (databusURI )
613627 res = __handle_databus_file_json__ (json_str )
614628 __download_list__ (res , localDir , vault_token_file = vault_token_file , auth_url = auth_url , client_id = client_id )
629+ elif artifact is not None :
630+ print ("artifactId not supported yet" ) # TODO
631+ elif group is not None :
632+ print ("groupId not supported yet" ) # TODO
633+ elif account is not None :
634+ print ("accountId not supported yet" ) # TODO
615635 else :
616- print ("dataId not supported yet" ) # TODO add support for other DatabusIds here (artifact, group, etc.)
636+ print ("dataId not supported yet" ) # TODO add support for other DatabusIds
617637 # query in local file
618638 elif databusURI .startswith ("file://" ):
619639 print ("query in file not supported yet" )
0 commit comments