Skip to content

Commit c626bde

Browse files
ArlindKadramfeurer
authored andcommitted
Solution which considers private datasets (#439)
* Basic solution for private datasets * Faulty and not necessary implementation of finally * Fixed assertRaises call in test_get_data * Updated get_dataset * Fixed typo
1 parent 2b2f8a2 commit c626bde

File tree

4 files changed

+30
-10
lines changed

4 files changed

+30
-10
lines changed

openml/datasets/functions.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
import os
55
import re
66
import shutil
7+
import six
78

89
from oslo_concurrency import lockutils
910
import xmltodict
1011

1112
import openml.utils
1213
import openml._api_calls
1314
from .dataset import OpenMLDataset
14-
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult, \
15-
OpenMLHashException
15+
from ..exceptions import OpenMLCacheException, OpenMLServerException, \
16+
OpenMLHashException, PrivateDatasetError
1617
from .. import config
1718
from .._api_calls import _read_url
1819

@@ -315,13 +316,21 @@ def get_dataset(dataset_id):
315316
did_cache_dir = _create_dataset_cache_directory(dataset_id)
316317

317318
try:
319+
remove_dataset_cache = True
318320
description = _get_dataset_description(did_cache_dir, dataset_id)
319321
arff_file = _get_dataset_arff(did_cache_dir, description)
320322
features = _get_dataset_features(did_cache_dir, dataset_id)
321323
qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
322-
except Exception as e:
323-
_remove_dataset_cache_dir(did_cache_dir)
324-
raise e
324+
remove_dataset_cache = False
325+
except OpenMLServerException as e:
326+
# if there was an exception, check if the user had access to the dataset
327+
if e.code == 112:
328+
six.raise_from(PrivateDatasetError(e.message), None)
329+
else:
330+
raise e
331+
finally:
332+
if remove_dataset_cache:
333+
_remove_dataset_cache_dir(did_cache_dir)
325334

326335
dataset = _create_dataset_from_description(
327336
description, features, qualities, arff_file
@@ -357,9 +366,8 @@ def _get_dataset_description(did_cache_dir, dataset_id):
357366

358367
try:
359368
return _get_cached_dataset_description(dataset_id)
360-
except (OpenMLCacheException):
369+
except OpenMLCacheException:
361370
dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id)
362-
363371
with io.open(description_file, "w", encoding='utf8') as fh:
364372
fh.write(dataset_xml)
365373

openml/exceptions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,10 @@ def __init__(self, message):
4343

4444
class OpenMLHashException(PyOpenMLError):
4545
"""Locally computed hash is different than hash announced by the server."""
46-
pass
46+
pass
47+
48+
49+
class PrivateDatasetError(PyOpenMLError):
50+
"Exception thrown when the user has no rights to access the dataset"
51+
def __init__(self, message):
52+
super(PrivateDatasetError, self).__init__(message)

openml/tasks/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from oslo_concurrency import lockutils
88
import xmltodict
99

10-
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
10+
from ..exceptions import OpenMLCacheException
1111
from ..datasets import get_dataset
1212
from .task import OpenMLTask, _create_task_cache_dir
1313
from .. import config

tests/test_datasets/test_dataset_functions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
import openml
1919
from openml import OpenMLDataset
20-
from openml.exceptions import OpenMLCacheException, PyOpenMLError, OpenMLHashException
20+
from openml.exceptions import OpenMLCacheException, PyOpenMLError, \
21+
OpenMLHashException, PrivateDatasetError
2122
from openml.testing import TestBase
2223
from openml.utils import _tag_entity
2324

@@ -231,6 +232,11 @@ def test_get_dataset(self):
231232
self.assertGreater(len(dataset.features), 1)
232233
self.assertGreater(len(dataset.qualities), 4)
233234

235+
# Issue324 Properly handle private datasets when trying to access them
236+
openml.config.server = self.production_server
237+
self.assertRaises(PrivateDatasetError, openml.datasets.get_dataset, 45)
238+
239+
234240
def test_get_dataset_with_string(self):
235241
dataset = openml.datasets.get_dataset(101)
236242
self.assertRaises(PyOpenMLError, dataset._get_arff, 'arff')

0 commit comments

Comments
 (0)