Skip to content

Commit 6589545

Browse files
authored
Merge pull request #268 from IFCA-Advanced-Computing/ferag-patch-8
Update utils.py
2 parents 4b3ab54 + 4f08c56 commit 6589545

File tree

1 file changed

+20
-17
lines changed

1 file changed

+20
-17
lines changed

fair_eva/api/utils.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -443,26 +443,29 @@ def find_dataset_file(metadata, url, data_formats):
443443
for tag in soup.find_all("a"):
444444
try:
445445
url_link = tag.get("href")
446-
response = requests.head(url_link, timeout=3, verify=False)
446+
# TODO
447+
if "http" not in url_link:
448+
response = requests.head(url_link, timeout=3, verify=False)
447449
except Exception as e:
448450
logging.debug(e)
449451

450-
try:
451-
cut_index = url.find(urllib.parse.urlparse(url).netloc) + len(
452-
urllib.parse.urlparse(url).netloc
453-
)
454-
url_link = url[:cut_index] + url_link
455-
logging.debug("Trying: " + url_link)
456-
response = requests.head(url_link, timeout=3, verify=False)
457-
content_type = response.headers.get("Content-Type")
458-
if content_type in data_formats:
459-
data_files.append(url_link)
460-
else:
461-
for f in data_formats:
462-
if f in url_link:
463-
data_files.append(url_link)
464-
except Exception as e:
465-
logging.error(e)
452+
if "http" not in url_link:
453+
try:
454+
cut_index = url.find(urllib.parse.urlparse(url).netloc) + len(
455+
urllib.parse.urlparse(url).netloc
456+
)
457+
url_link = url[:cut_index] + url_link
458+
logging.debug("Trying: " + url_link)
459+
response = requests.head(url_link, timeout=3, verify=False)
460+
content_type = response.headers.get("Content-Type")
461+
if content_type in data_formats:
462+
data_files.append(url_link)
463+
else:
464+
for f in data_formats:
465+
if f in url_link:
466+
data_files.append(url_link)
467+
except Exception as e:
468+
logging.error(e)
466469

467470
if len(data_files) > 0:
468471
points = 100

0 commit comments

Comments
 (0)