Skip to content

Commit d0b9cc3

Browse files
committed
Adding unit test for ignore_attribute
1 parent b16952c commit d0b9cc3

File tree

1 file changed

+78
-2
lines changed

1 file changed

+78
-2
lines changed

tests/test_datasets/test_dataset_functions.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,7 @@ def test_ignore_attributes_dataset(self):
10151015
self.assertEqual(dataset.ignore_attribute, ['outlook'])
10161016

10171017
# pass a list to ignore_attribute
1018+
ignore_attribute = ['outlook', 'windy']
10181019
dataset = openml.datasets.functions.create_dataset(
10191020
name=name,
10201021
description=description,
@@ -1025,15 +1026,15 @@ def test_ignore_attributes_dataset(self):
10251026
licence=licence,
10261027
default_target_attribute=default_target_attribute,
10271028
row_id_attribute=None,
1028-
ignore_attribute=['outlook', 'windy'],
1029+
ignore_attribute=ignore_attribute,
10291030
citation=citation,
10301031
attributes='auto',
10311032
data=df,
10321033
version_label='test',
10331034
original_data_url=original_data_url,
10341035
paper_url=paper_url
10351036
)
1036-
self.assertEqual(dataset.ignore_attribute, ['outlook', 'windy'])
1037+
self.assertEqual(dataset.ignore_attribute, ignore_attribute)
10371038

10381039
# raise an error if unknown type
10391040
err_msg = 'Wrong data type for ignore_attribute. Should be list.'
@@ -1057,6 +1058,81 @@ def test_ignore_attributes_dataset(self):
10571058
paper_url=paper_url
10581059
)
10591060

1061+
def test_publish_fetch_ignore_attribute(self):
1062+
data = [
1063+
['a', 'sunny', 85.0, 85.0, 'FALSE', 'no'],
1064+
['b', 'sunny', 80.0, 90.0, 'TRUE', 'no'],
1065+
['c', 'overcast', 83.0, 86.0, 'FALSE', 'yes'],
1066+
['d', 'rainy', 70.0, 96.0, 'FALSE', 'yes'],
1067+
['e', 'rainy', 68.0, 80.0, 'FALSE', 'yes']
1068+
]
1069+
column_names = ['rnd_str', 'outlook', 'temperature', 'humidity',
1070+
'windy', 'play']
1071+
df = pd.DataFrame(data, columns=column_names)
1072+
# enforce the type of each column
1073+
df['outlook'] = df['outlook'].astype('category')
1074+
df['windy'] = df['windy'].astype('bool')
1075+
df['play'] = df['play'].astype('category')
1076+
# meta-information
1077+
name = '%s-pandas_testing_dataset' % self._get_sentinel()
1078+
description = 'Synthetic dataset created from a Pandas DataFrame'
1079+
creator = 'OpenML tester'
1080+
collection_date = '01-01-2018'
1081+
language = 'English'
1082+
licence = 'MIT'
1083+
default_target_attribute = 'play'
1084+
citation = 'None'
1085+
original_data_url = 'http://openml.github.io/openml-python'
1086+
paper_url = 'http://openml.github.io/openml-python'
1087+
1088+
# pass a list to ignore_attribute
1089+
ignore_attribute = ['outlook', 'windy']
1090+
dataset = openml.datasets.functions.create_dataset(
1091+
name=name,
1092+
description=description,
1093+
creator=creator,
1094+
contributor=None,
1095+
collection_date=collection_date,
1096+
language=language,
1097+
licence=licence,
1098+
default_target_attribute=default_target_attribute,
1099+
row_id_attribute=None,
1100+
ignore_attribute=ignore_attribute,
1101+
citation=citation,
1102+
attributes='auto',
1103+
data=df,
1104+
version_label='test',
1105+
original_data_url=original_data_url,
1106+
paper_url=paper_url
1107+
)
1108+
1109+
# publish dataset
1110+
upload_did = dataset.publish()
1111+
# test if publish was successful
1112+
self.assertIsInstance(dataset.dataset_id, int)
1113+
1114+
trials = 0
1115+
timeout_limit = 100
1116+
dataset = None
1117+
# fetching from server
1118+
# loop till timeout and not successful
1119+
while True:
1120+
if trials > timeout_limit:
1121+
break
1122+
try:
1123+
dataset = openml.datasets.get_dataset(upload_did)
1124+
break
1125+
except Exception as e:
1126+
trials += 1
1127+
if str(e).split(':')[-1].strip() == "Dataset not processed yet":
1128+
# if returned code 273: Dataset not processed yet
1129+
continue
1130+
else:
1131+
raise RuntimeError(str(e))
1132+
if dataset is None:
1133+
raise ValueError("Failed to fetch uploaded dataset: {}".format(upload_did))
1134+
self.assertEqual(dataset.ignore_attribute, ignore_attribute)
1135+
10601136
def test_create_dataset_row_id_attribute_error(self):
10611137
# meta-information
10621138
name = '%s-pandas_testing_dataset' % self._get_sentinel()

0 commit comments

Comments
 (0)