Refactoring 'ignore_attributes' to 'ignore_attribute'

Neeratyoy · Neeratyoy · commit cda9200ccbab · 2019-06-14T15:30:41.000+02:00
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
@@ -132,9 +132,9 @@ def __init__(self, name, description, format=None,
         self.default_target_attribute = default_target_attribute
         self.row_id_attribute = row_id_attribute
         if isinstance(ignore_attribute, str):
-            self.ignore_attributes = [ignore_attribute]
+            self.ignore_attribute = [ignore_attribute]
         elif isinstance(ignore_attribute, list) or ignore_attribute is None:
-            self.ignore_attributes = ignore_attribute
+            self.ignore_attribute = ignore_attribute
         else:
             raise ValueError('Wrong data type for ignore_attribute. '
                              'Should be list.')
@@ -423,7 +423,7 @@ def get_data(
             self,
             target: Optional[Union[List[str], str]] = None,
             include_row_id: bool = False,
-            include_ignore_attributes: bool = False,
+            include_ignore_attribute: bool = False,
             dataset_format: str = "dataframe",
     ) -> Tuple[
             Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix],
@@ -440,7 +440,7 @@ def get_data(
             Splitting multiple columns is currently not supported.
         include_row_id : boolean (default=False)
             Whether to include row ids in the returned dataset.
-        include_ignore_attributes : boolean (default=False)
+        include_ignore_attribute : boolean (default=False)
             Whether to include columns that are marked as "ignore"
             on the server in the dataset.
         dataset_format : string (default='dataframe')
@@ -479,11 +479,11 @@ def get_data(
             elif isinstance(self.row_id_attribute, Iterable):
                 to_exclude.extend(self.row_id_attribute)
 
-        if not include_ignore_attributes and self.ignore_attributes is not None:
-            if isinstance(self.ignore_attributes, str):
-                to_exclude.append(self.ignore_attributes)
-            elif isinstance(self.ignore_attributes, Iterable):
-                to_exclude.extend(self.ignore_attributes)
+        if not include_ignore_attribute and self.ignore_attribute is not None:
+            if isinstance(self.ignore_attribute, str):
+                to_exclude.append(self.ignore_attribute)
+            elif isinstance(self.ignore_attribute, Iterable):
+                to_exclude.extend(self.ignore_attribute)
 
         if len(to_exclude) > 0:
             logger.info("Going to remove the following attributes:"
@@ -566,7 +566,7 @@ def retrieve_class_labels(self, target_name: str = 'class') -> Union[None, List[
         return None
 
     def get_features_by_type(self, data_type, exclude=None,
-                             exclude_ignore_attributes=True,
+                             exclude_ignore_attribute=True,
                              exclude_row_id_attribute=True):
         """
         Return indices of features of a given type, e.g. all nominal features.
@@ -579,7 +579,7 @@ def get_features_by_type(self, data_type, exclude=None,
         exclude : list(int)
             Indices to exclude (and adapt the return values as if these indices
                         are not present)
-        exclude_ignore_attributes : bool
+        exclude_ignore_attribute : bool
             Whether to exclude the defined ignore attributes (and adapt the
             return values as if these indices are not present)
         exclude_row_id_attribute : bool
@@ -593,9 +593,9 @@ def get_features_by_type(self, data_type, exclude=None,
         """
         if data_type not in OpenMLDataFeature.LEGAL_DATA_TYPES:
             raise TypeError("Illegal feature type requested")
-        if self.ignore_attributes is not None:
-            if not isinstance(self.ignore_attributes, list):
-                raise TypeError("ignore_attributes should be a list")
+        if self.ignore_attribute is not None:
+            if not isinstance(self.ignore_attribute, list):
+                raise TypeError("ignore_attribute should be a list")
         if self.row_id_attribute is not None:
             if not isinstance(self.row_id_attribute, str):
                 raise TypeError("row id attribute should be a str")
@@ -607,8 +607,8 @@ def get_features_by_type(self, data_type, exclude=None,
         to_exclude = []
         if exclude is not None:
             to_exclude.extend(exclude)
-        if exclude_ignore_attributes and self.ignore_attributes is not None:
-            to_exclude.extend(self.ignore_attributes)
+        if exclude_ignore_attribute and self.ignore_attribute is not None:
+            to_exclude.extend(self.ignore_attribute)
         if exclude_row_id_attribute and self.row_id_attribute is not None:
             to_exclude.append(self.row_id_attribute)
 
@@ -680,7 +680,7 @@ def _to_xml(self):
         props = ['id', 'name', 'version', 'description', 'format', 'creator',
                  'contributor', 'collection_date', 'upload_date', 'language',
                  'licence', 'url', 'default_target_attribute',
-                 'row_id_attribute', 'ignore_attributes', 'version_label',
+                 'row_id_attribute', 'ignore_attribute', 'version_label',
                  'citation', 'tag', 'visibility', 'original_data_url',
                  'paper_url', 'update_comment', 'md5_checksum']
 
@@ -690,8 +690,6 @@ def _to_xml(self):
 
         for prop in props:
             content = getattr(self, prop, None)
-            if prop == 'ignore_attributes':
-                prop = "ignore_attribute"
             if content is not None:
                 data_dict["oml:" + prop] = content
 
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -277,10 +277,10 @@ def __list_datasets(api_call, output_format='dict'):
 
     datasets = dict()
     for dataset_ in datasets_dict['oml:data']['oml:dataset']:
-        ignore_attributes = ['oml:file_id', 'oml:quality']
+        ignore_attribute = ['oml:file_id', 'oml:quality']
         dataset = {k.replace('oml:', ''): v
                    for (k, v) in dataset_.items()
-                   if k not in ignore_attributes}
+                   if k not in ignore_attribute}
         dataset['did'] = int(dataset['did'])
         dataset['version'] = int(dataset['version'])
 
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
@@ -141,7 +141,7 @@ def test_get_data_with_target_pandas(self):
         self.assertNotIn("class", attribute_names)
 
     def test_get_data_rowid_and_ignore_and_target(self):
-        self.dataset.ignore_attributes = ["condition"]
+        self.dataset.ignore_attribute = ["condition"]
         self.dataset.row_id_attribute = ["hardness"]
         X, y, categorical, names = self.dataset.get_data(target="class")
         self.assertEqual(X.shape, (898, 36))
@@ -151,15 +151,15 @@ def test_get_data_rowid_and_ignore_and_target(self):
         self.assertEqual(y.shape, (898, ))
 
     def test_get_data_with_ignore_attributes(self):
-        self.dataset.ignore_attributes = ["condition"]
-        rval, _, categorical, _ = self.dataset.get_data(include_ignore_attributes=True)
+        self.dataset.ignore_attribute = ["condition"]
+        rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
         for (dtype, is_cat) in zip(rval.dtypes, categorical):
             expected_type = 'category' if is_cat else 'float64'
             self.assertEqual(dtype.name, expected_type)
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
 
-        rval, _, categorical, _ = self.dataset.get_data(include_ignore_attributes=False)
+        rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=False)
         for (dtype, is_cat) in zip(rval.dtypes, categorical):
             expected_type = 'category' if is_cat else 'float64'
             self.assertEqual(dtype.name, expected_type)
@@ -271,17 +271,17 @@ def test_get_sparse_dataset_with_rowid(self):
         self.assertEqual(len(categorical), 20000)
 
     def test_get_sparse_dataset_with_ignore_attributes(self):
-        self.sparse_dataset.ignore_attributes = ["V256"]
+        self.sparse_dataset.ignore_attribute = ["V256"]
         rval, _, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array', include_ignore_attributes=True
+            dataset_format='array', include_ignore_attribute=True
         )
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
         self.assertEqual(rval.shape, (600, 20001))
 
         self.assertEqual(len(categorical), 20001)
         rval, _, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array', include_ignore_attributes=False
+            dataset_format='array', include_ignore_attribute=False
         )
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
@@ -290,13 +290,13 @@ def test_get_sparse_dataset_with_ignore_attributes(self):
 
     def test_get_sparse_dataset_rowid_and_ignore_and_target(self):
         # TODO: re-add row_id and ignore attributes
-        self.sparse_dataset.ignore_attributes = ["V256"]
+        self.sparse_dataset.ignore_attribute = ["V256"]
         self.sparse_dataset.row_id_attribute = ["V512"]
         X, y, categorical, _ = self.sparse_dataset.get_data(
             dataset_format='array',
             target="class",
             include_row_id=False,
-            include_ignore_attributes=False,
+            include_ignore_attribute=False,
         )
         self.assertTrue(sparse.issparse(X))
         self.assertEqual(X.dtype, np.float32)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -1012,7 +1012,7 @@ def test_ignore_attributes_dataset(self):
             original_data_url=original_data_url,
             paper_url=paper_url
         )
-        self.assertEqual(dataset.ignore_attributes, ['outlook'])
+        self.assertEqual(dataset.ignore_attribute, ['outlook'])
 
         # pass a list to ignore_attribute
         dataset = openml.datasets.functions.create_dataset(
@@ -1033,7 +1033,7 @@ def test_ignore_attributes_dataset(self):
             original_data_url=original_data_url,
             paper_url=paper_url
         )
-        self.assertEqual(dataset.ignore_attributes, ['outlook', 'windy'])
+        self.assertEqual(dataset.ignore_attribute, ['outlook', 'windy'])
 
         # raise an error if unknown type
         err_msg = 'Wrong data type for ignore_attribute. Should be list.'