Skip to content

Commit cda9200

Browse files
committed
Refactoring 'ignore_attributes' to 'ignore_attribute'
1 parent da11ea0 commit cda9200

File tree

4 files changed

+30
-32
lines changed

4 files changed

+30
-32
lines changed

openml/datasets/dataset.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ def __init__(self, name, description, format=None,
132132
self.default_target_attribute = default_target_attribute
133133
self.row_id_attribute = row_id_attribute
134134
if isinstance(ignore_attribute, str):
135-
self.ignore_attributes = [ignore_attribute]
135+
self.ignore_attribute = [ignore_attribute]
136136
elif isinstance(ignore_attribute, list) or ignore_attribute is None:
137-
self.ignore_attributes = ignore_attribute
137+
self.ignore_attribute = ignore_attribute
138138
else:
139139
raise ValueError('Wrong data type for ignore_attribute. '
140140
'Should be list.')
@@ -423,7 +423,7 @@ def get_data(
423423
self,
424424
target: Optional[Union[List[str], str]] = None,
425425
include_row_id: bool = False,
426-
include_ignore_attributes: bool = False,
426+
include_ignore_attribute: bool = False,
427427
dataset_format: str = "dataframe",
428428
) -> Tuple[
429429
Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix],
@@ -440,7 +440,7 @@ def get_data(
440440
Splitting multiple columns is currently not supported.
441441
include_row_id : boolean (default=False)
442442
Whether to include row ids in the returned dataset.
443-
include_ignore_attributes : boolean (default=False)
443+
include_ignore_attribute : boolean (default=False)
444444
Whether to include columns that are marked as "ignore"
445445
on the server in the dataset.
446446
dataset_format : string (default='dataframe')
@@ -479,11 +479,11 @@ def get_data(
479479
elif isinstance(self.row_id_attribute, Iterable):
480480
to_exclude.extend(self.row_id_attribute)
481481

482-
if not include_ignore_attributes and self.ignore_attributes is not None:
483-
if isinstance(self.ignore_attributes, str):
484-
to_exclude.append(self.ignore_attributes)
485-
elif isinstance(self.ignore_attributes, Iterable):
486-
to_exclude.extend(self.ignore_attributes)
482+
if not include_ignore_attribute and self.ignore_attribute is not None:
483+
if isinstance(self.ignore_attribute, str):
484+
to_exclude.append(self.ignore_attribute)
485+
elif isinstance(self.ignore_attribute, Iterable):
486+
to_exclude.extend(self.ignore_attribute)
487487

488488
if len(to_exclude) > 0:
489489
logger.info("Going to remove the following attributes:"
@@ -566,7 +566,7 @@ def retrieve_class_labels(self, target_name: str = 'class') -> Union[None, List[
566566
return None
567567

568568
def get_features_by_type(self, data_type, exclude=None,
569-
exclude_ignore_attributes=True,
569+
exclude_ignore_attribute=True,
570570
exclude_row_id_attribute=True):
571571
"""
572572
Return indices of features of a given type, e.g. all nominal features.
@@ -579,7 +579,7 @@ def get_features_by_type(self, data_type, exclude=None,
579579
exclude : list(int)
580580
Indices to exclude (and adapt the return values as if these indices
581581
are not present)
582-
exclude_ignore_attributes : bool
582+
exclude_ignore_attribute : bool
583583
Whether to exclude the defined ignore attributes (and adapt the
584584
return values as if these indices are not present)
585585
exclude_row_id_attribute : bool
@@ -593,9 +593,9 @@ def get_features_by_type(self, data_type, exclude=None,
593593
"""
594594
if data_type not in OpenMLDataFeature.LEGAL_DATA_TYPES:
595595
raise TypeError("Illegal feature type requested")
596-
if self.ignore_attributes is not None:
597-
if not isinstance(self.ignore_attributes, list):
598-
raise TypeError("ignore_attributes should be a list")
596+
if self.ignore_attribute is not None:
597+
if not isinstance(self.ignore_attribute, list):
598+
raise TypeError("ignore_attribute should be a list")
599599
if self.row_id_attribute is not None:
600600
if not isinstance(self.row_id_attribute, str):
601601
raise TypeError("row id attribute should be a str")
@@ -607,8 +607,8 @@ def get_features_by_type(self, data_type, exclude=None,
607607
to_exclude = []
608608
if exclude is not None:
609609
to_exclude.extend(exclude)
610-
if exclude_ignore_attributes and self.ignore_attributes is not None:
611-
to_exclude.extend(self.ignore_attributes)
610+
if exclude_ignore_attribute and self.ignore_attribute is not None:
611+
to_exclude.extend(self.ignore_attribute)
612612
if exclude_row_id_attribute and self.row_id_attribute is not None:
613613
to_exclude.append(self.row_id_attribute)
614614

@@ -680,7 +680,7 @@ def _to_xml(self):
680680
props = ['id', 'name', 'version', 'description', 'format', 'creator',
681681
'contributor', 'collection_date', 'upload_date', 'language',
682682
'licence', 'url', 'default_target_attribute',
683-
'row_id_attribute', 'ignore_attributes', 'version_label',
683+
'row_id_attribute', 'ignore_attribute', 'version_label',
684684
'citation', 'tag', 'visibility', 'original_data_url',
685685
'paper_url', 'update_comment', 'md5_checksum']
686686

@@ -690,8 +690,6 @@ def _to_xml(self):
690690

691691
for prop in props:
692692
content = getattr(self, prop, None)
693-
if prop == 'ignore_attributes':
694-
prop = "ignore_attribute"
695693
if content is not None:
696694
data_dict["oml:" + prop] = content
697695

openml/datasets/functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,10 +277,10 @@ def __list_datasets(api_call, output_format='dict'):
277277

278278
datasets = dict()
279279
for dataset_ in datasets_dict['oml:data']['oml:dataset']:
280-
ignore_attributes = ['oml:file_id', 'oml:quality']
280+
ignore_attribute = ['oml:file_id', 'oml:quality']
281281
dataset = {k.replace('oml:', ''): v
282282
for (k, v) in dataset_.items()
283-
if k not in ignore_attributes}
283+
if k not in ignore_attribute}
284284
dataset['did'] = int(dataset['did'])
285285
dataset['version'] = int(dataset['version'])
286286

tests/test_datasets/test_dataset.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def test_get_data_with_target_pandas(self):
141141
self.assertNotIn("class", attribute_names)
142142

143143
def test_get_data_rowid_and_ignore_and_target(self):
144-
self.dataset.ignore_attributes = ["condition"]
144+
self.dataset.ignore_attribute = ["condition"]
145145
self.dataset.row_id_attribute = ["hardness"]
146146
X, y, categorical, names = self.dataset.get_data(target="class")
147147
self.assertEqual(X.shape, (898, 36))
@@ -151,15 +151,15 @@ def test_get_data_rowid_and_ignore_and_target(self):
151151
self.assertEqual(y.shape, (898, ))
152152

153153
def test_get_data_with_ignore_attributes(self):
154-
self.dataset.ignore_attributes = ["condition"]
155-
rval, _, categorical, _ = self.dataset.get_data(include_ignore_attributes=True)
154+
self.dataset.ignore_attribute = ["condition"]
155+
rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
156156
for (dtype, is_cat) in zip(rval.dtypes, categorical):
157157
expected_type = 'category' if is_cat else 'float64'
158158
self.assertEqual(dtype.name, expected_type)
159159
self.assertEqual(rval.shape, (898, 39))
160160
self.assertEqual(len(categorical), 39)
161161

162-
rval, _, categorical, _ = self.dataset.get_data(include_ignore_attributes=False)
162+
rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=False)
163163
for (dtype, is_cat) in zip(rval.dtypes, categorical):
164164
expected_type = 'category' if is_cat else 'float64'
165165
self.assertEqual(dtype.name, expected_type)
@@ -271,17 +271,17 @@ def test_get_sparse_dataset_with_rowid(self):
271271
self.assertEqual(len(categorical), 20000)
272272

273273
def test_get_sparse_dataset_with_ignore_attributes(self):
274-
self.sparse_dataset.ignore_attributes = ["V256"]
274+
self.sparse_dataset.ignore_attribute = ["V256"]
275275
rval, _, categorical, _ = self.sparse_dataset.get_data(
276-
dataset_format='array', include_ignore_attributes=True
276+
dataset_format='array', include_ignore_attribute=True
277277
)
278278
self.assertTrue(sparse.issparse(rval))
279279
self.assertEqual(rval.dtype, np.float32)
280280
self.assertEqual(rval.shape, (600, 20001))
281281

282282
self.assertEqual(len(categorical), 20001)
283283
rval, _, categorical, _ = self.sparse_dataset.get_data(
284-
dataset_format='array', include_ignore_attributes=False
284+
dataset_format='array', include_ignore_attribute=False
285285
)
286286
self.assertTrue(sparse.issparse(rval))
287287
self.assertEqual(rval.dtype, np.float32)
@@ -290,13 +290,13 @@ def test_get_sparse_dataset_with_ignore_attributes(self):
290290

291291
def test_get_sparse_dataset_rowid_and_ignore_and_target(self):
292292
# TODO: re-add row_id and ignore attributes
293-
self.sparse_dataset.ignore_attributes = ["V256"]
293+
self.sparse_dataset.ignore_attribute = ["V256"]
294294
self.sparse_dataset.row_id_attribute = ["V512"]
295295
X, y, categorical, _ = self.sparse_dataset.get_data(
296296
dataset_format='array',
297297
target="class",
298298
include_row_id=False,
299-
include_ignore_attributes=False,
299+
include_ignore_attribute=False,
300300
)
301301
self.assertTrue(sparse.issparse(X))
302302
self.assertEqual(X.dtype, np.float32)

tests/test_datasets/test_dataset_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,7 @@ def test_ignore_attributes_dataset(self):
10121012
original_data_url=original_data_url,
10131013
paper_url=paper_url
10141014
)
1015-
self.assertEqual(dataset.ignore_attributes, ['outlook'])
1015+
self.assertEqual(dataset.ignore_attribute, ['outlook'])
10161016

10171017
# pass a list to ignore_attribute
10181018
dataset = openml.datasets.functions.create_dataset(
@@ -1033,7 +1033,7 @@ def test_ignore_attributes_dataset(self):
10331033
original_data_url=original_data_url,
10341034
paper_url=paper_url
10351035
)
1036-
self.assertEqual(dataset.ignore_attributes, ['outlook', 'windy'])
1036+
self.assertEqual(dataset.ignore_attribute, ['outlook', 'windy'])
10371037

10381038
# raise an error if unknown type
10391039
err_msg = 'Wrong data type for ignore_attribute. Should be list.'

0 commit comments

Comments
 (0)