Skip to content

Commit 3232d0d

Browse files
authored
Improve error logging (#428)
* ADD hash checks to datasets * split target on comma, allows errors on multiple targets * FIX unit test
1 parent 6ac98aa commit 3232d0d

File tree

4 files changed

+18
-7
lines changed

4 files changed

+18
-7
lines changed

openml/datasets/dataset.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,10 @@ def get_data(self, target=None,
270270
rval.append(data)
271271
else:
272272
if isinstance(target, six.string_types):
273-
target = [target]
273+
if ',' in target:
274+
target = target.split(',')
275+
else:
276+
target = [target]
274277
targets = np.array([True if column in target else False
275278
for column in attribute_names])
276279
if np.sum(targets) > 1:

openml/datasets/functions.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010

1111
import openml.utils
1212
from .dataset import OpenMLDataset
13-
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
13+
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult, \
14+
OpenMLHashException
1415
from .. import config
1516
from .._api_calls import _perform_api_call, _read_url
1617

@@ -404,12 +405,14 @@ def _get_dataset_arff(did_cache_dir, description):
404405
url = description['oml:url']
405406
arff_string = _read_url(url)
406407
md5 = hashlib.md5()
407-
md5.update(arff_string.encode('utf8'))
408+
md5.update(arff_string.encode('utf-8'))
408409
md5_checksum = md5.hexdigest()
409410
if md5_checksum != md5_checksum_fixture:
410-
raise ValueError(
411+
raise OpenMLHashException(
411412
'Checksum %s of downloaded dataset %d is unequal to the checksum '
412-
'%s sent by the server.' % (md5_checksum, did, md5_checksum_fixture)
413+
'%s sent by the server.' % (
414+
md5_checksum, int(did), md5_checksum_fixture
415+
)
413416
)
414417

415418
with io.open(output_file_path, "w", encoding='utf8') as fh:

openml/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,8 @@ class OpenMLCacheException(PyOpenMLError):
3535
"""Dataset / task etc not found in cache"""
3636
def __init__(self, message):
3737
super(OpenMLCacheException, self).__init__(message)
38+
39+
40+
class OpenMLHashException(PyOpenMLError):
41+
"""Locally computed hash is different than hash announced by the server."""
42+
pass

tests/test_datasets/test_dataset_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import openml
1919
from openml import OpenMLDataset
20-
from openml.exceptions import OpenMLCacheException, PyOpenMLError
20+
from openml.exceptions import OpenMLCacheException, PyOpenMLError, OpenMLHashException
2121
from openml.testing import TestBase
2222
from openml.utils import _tag_entity
2323

@@ -268,7 +268,7 @@ def test__getarff_md5_issue(self):
268268
'oml:url': 'https://www.openml.org/data/download/61',
269269
}
270270
self.assertRaisesRegexp(
271-
ValueError,
271+
OpenMLHashException,
272272
'Checksum ad484452702105cbf3d30f8deaba39a9 of downloaded dataset 5 '
273273
'is unequal to the checksum abc sent by the server.',
274274
_get_dataset_arff,

0 commit comments

Comments
 (0)