@@ -443,26 +443,29 @@ def find_dataset_file(metadata, url, data_formats):
443443 for tag in soup .find_all ("a" ):
444444 try :
445445 url_link = tag .get ("href" )
446- response = requests .head (url_link , timeout = 3 , verify = False )
446+ # TODO
447+ if "http" not in url_link :
448+ response = requests .head (url_link , timeout = 3 , verify = False )
447449 except Exception as e :
448450 logging .debug (e )
449451
450- try :
451- cut_index = url .find (urllib .parse .urlparse (url ).netloc ) + len (
452- urllib .parse .urlparse (url ).netloc
453- )
454- url_link = url [:cut_index ] + url_link
455- logging .debug ("Trying: " + url_link )
456- response = requests .head (url_link , timeout = 3 , verify = False )
457- content_type = response .headers .get ("Content-Type" )
458- if content_type in data_formats :
459- data_files .append (url_link )
460- else :
461- for f in data_formats :
462- if f in url_link :
463- data_files .append (url_link )
464- except Exception as e :
465- logging .error (e )
452+ if "http" not in url_link :
453+ try :
454+ cut_index = url .find (urllib .parse .urlparse (url ).netloc ) + len (
455+ urllib .parse .urlparse (url ).netloc
456+ )
457+ url_link = url [:cut_index ] + url_link
458+ logging .debug ("Trying: " + url_link )
459+ response = requests .head (url_link , timeout = 3 , verify = False )
460+ content_type = response .headers .get ("Content-Type" )
461+ if content_type in data_formats :
462+ data_files .append (url_link )
463+ else :
464+ for f in data_formats :
465+ if f in url_link :
466+ data_files .append (url_link )
467+ except Exception as e :
468+ logging .error (e )
466469
467470 if len (data_files ) > 0 :
468471 points = 100
0 commit comments