Skip to content

Commit e6250fa

Browse files
authored
Provide clearer error when server provides bad data description XML (#1178)
1 parent 5cd6973 commit e6250fa

File tree

3 files changed

+19
-10
lines changed

3 files changed

+19
-10
lines changed

openml/_api_calls.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@
2323
)
2424

2525

26+
def _create_url_from_endpoint(endpoint: str) -> str:
27+
url = config.server
28+
if not url.endswith("/"):
29+
url += "/"
30+
url += endpoint
31+
return url.replace("=", "%3d")
32+
33+
2634
def _perform_api_call(call, request_method, data=None, file_elements=None):
2735
"""
2836
Perform an API call at the OpenML server.
@@ -50,12 +58,7 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
5058
return_value : str
5159
Return value of the OpenML server
5260
"""
53-
url = config.server
54-
if not url.endswith("/"):
55-
url += "/"
56-
url += call
57-
58-
url = url.replace("=", "%3d")
61+
url = _create_url_from_endpoint(call)
5962
logging.info("Starting [%s] request for the URL %s", request_method, url)
6063
start = time.time()
6164

openml/datasets/functions.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import io
44
import logging
55
import os
6+
from pyexpat import ExpatError
67
from typing import List, Dict, Union, Optional, cast
78

89
import numpy as np
@@ -19,6 +20,7 @@
1920
from .dataset import OpenMLDataset
2021
from ..exceptions import (
2122
OpenMLHashException,
23+
OpenMLServerError,
2224
OpenMLServerException,
2325
OpenMLPrivateDatasetError,
2426
)
@@ -437,7 +439,7 @@ def get_dataset(
437439
parquet_file = None
438440
remove_dataset_cache = False
439441
except OpenMLServerException as e:
440-
# if there was an exception,
442+
# if there was an exception
441443
# check if the user had access to the dataset
442444
if e.code == 112:
443445
raise OpenMLPrivateDatasetError(e.message) from None
@@ -949,14 +951,18 @@ def _get_dataset_description(did_cache_dir, dataset_id):
949951
try:
950952
with io.open(description_file, encoding="utf8") as fh:
951953
dataset_xml = fh.read()
954+
description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
952955
except Exception:
953956
url_extension = "data/{}".format(dataset_id)
954957
dataset_xml = openml._api_calls._perform_api_call(url_extension, "get")
958+
try:
959+
description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
960+
except ExpatError as e:
961+
url = openml._api_calls._create_url_from_endpoint(url_extension)
962+
raise OpenMLServerError(f"Dataset description XML at '{url}' is malformed.") from e
955963
with io.open(description_file, "w", encoding="utf8") as fh:
956964
fh.write(dataset_xml)
957965

958-
description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
959-
960966
return description
961967

962968

tests/test_datasets/test_dataset_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1240,7 +1240,7 @@ def _wait_for_dataset_being_processed(self, dataset_id):
12401240
try:
12411241
downloaded_dataset = openml.datasets.get_dataset(dataset_id)
12421242
break
1243-
except Exception as e:
1243+
except OpenMLServerException as e:
12441244
# returned code 273: Dataset not processed yet
12451245
# returned code 362: No qualities found
12461246
TestBase.logger.error(

0 commit comments

Comments
 (0)