Skip to content

Commit 876be65

Browse files
glemaitremfeurer
authored andcommitted
[MRG] DEPR: remove the format parameter from create_dataset (#592)
* DEPR: remove the format parameter from create_dataset * EHN: check the type of dataframe before the conversion * TST: remove the format parameter
1 parent 57d61c4 commit 876be65

File tree

2 files changed

+24
-51
lines changed

2 files changed

+24
-51
lines changed

openml/datasets/functions.py

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def create_dataset(name, description, creator, contributor,
419419
licence, attributes, data,
420420
default_target_attribute,
421421
ignore_attribute, citation,
422-
row_id_attribute=None, format=None,
422+
row_id_attribute=None,
423423
original_data_url=None, paper_url=None,
424424
update_comment=None, version_label=None):
425425
"""Create a dataset.
@@ -473,11 +473,6 @@ def create_dataset(name, description, creator, contributor,
473473
be discarded.
474474
.. versionadded: 0.8
475475
Inference of ``row_id_attribute`` from a dataframe.
476-
format : str, optional
477-
Format of the dataset which can be either 'arff' or 'sparse_arff'.
478-
By default, the format is automatically inferred.
479-
.. deprecated: 0.8
480-
``format`` is deprecated in 0.8 and will be removed in 0.10.
481476
original_data_url : str, optional
482477
For derived data, the url to the original dataset.
483478
paper_url : str, optional
@@ -536,34 +531,29 @@ def create_dataset(name, description, creator, contributor,
536531
else:
537532
data = data.values
538533

539-
if format is not None:
540-
warn("The format parameter will be deprecated in the future,"
541-
" the method will determine the format of the ARFF "
542-
"based on the given data.", DeprecationWarning)
543-
d_format = format
544-
545-
# Determine ARFF format from the dataset
546-
else:
547-
if isinstance(data, (list, np.ndarray)):
548-
if isinstance(data[0], (list, np.ndarray)):
549-
d_format = 'arff'
550-
elif isinstance(data[0], dict):
551-
d_format = 'sparse_arff'
552-
else:
553-
raise ValueError(
554-
'When giving a list or a numpy.ndarray, '
555-
'they should contain a list/ numpy.ndarray '
556-
'for dense data or a dictionary for sparse '
557-
'data. Got {!r} instead.'
558-
.format(data[0])
559-
)
560-
elif isinstance(data, coo_matrix):
561-
d_format = 'sparse_arff'
534+
if isinstance(data, (list, np.ndarray)):
535+
if isinstance(data[0], (list, np.ndarray)):
536+
data_format = 'arff'
537+
elif isinstance(data[0], dict):
538+
data_format = 'sparse_arff'
562539
else:
563540
raise ValueError(
564-
'Invalid data type. The data type can be a list, '
565-
'a numpy ndarray or a scipy.sparse.coo_matrix'
541+
'When giving a list or a numpy.ndarray, '
542+
'they should contain a list/ numpy.ndarray '
543+
'for dense data or a dictionary for sparse '
544+
'data. Got {!r} instead.'
545+
.format(data[0])
566546
)
547+
elif isinstance(data, coo_matrix):
548+
data_format = 'sparse_arff'
549+
else:
550+
raise ValueError(
551+
'When giving a list or a numpy.ndarray, '
552+
'they should contain a list/ numpy.ndarray '
553+
'for dense data or a dictionary for sparse '
554+
'data. Got {!r} instead.'
555+
.format(data[0])
556+
)
567557

568558
arff_object = {
569559
'relation': name,
@@ -577,10 +567,11 @@ def create_dataset(name, description, creator, contributor,
577567
try:
578568
# check if ARFF is valid
579569
decoder = arff.ArffDecoder()
570+
return_type = arff.COO if data_format == 'sparse_arff' else arff.DENSE
580571
decoder.decode(
581572
arff_dataset,
582573
encode_nominal=True,
583-
return_type=arff.COO if d_format == 'sparse_arff' else arff.DENSE
574+
return_type=return_type
584575
)
585576
except arff.ArffException:
586577
raise ValueError("The arguments you have provided \
@@ -589,7 +580,7 @@ def create_dataset(name, description, creator, contributor,
589580
return OpenMLDataset(
590581
name,
591582
description,
592-
data_format=d_format,
583+
data_format=data_format,
593584
creator=creator,
594585
contributor=contributor,
595586
collection_date=collection_date,

tests/test_datasets/test_dataset_functions.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -683,18 +683,6 @@ def test_create_invalid_dataset(self):
683683
**param
684684
)
685685

686-
def test_create_dataset_warning(self):
687-
688-
parameters = self._get_empty_param_for_dataset()
689-
parameters['format'] = 'arff'
690-
with catch_warnings():
691-
filterwarnings('error')
692-
self.assertRaises(
693-
DeprecationWarning,
694-
create_dataset,
695-
**parameters
696-
)
697-
698686
def test_get_online_dataset_arff(self):
699687

700688
# Australian dataset
@@ -768,7 +756,6 @@ def test_create_dataset_pandas(self):
768756
citation=citation,
769757
attributes='auto',
770758
data=df,
771-
format=None,
772759
version_label='test',
773760
original_data_url=original_data_url,
774761
paper_url=paper_url
@@ -803,7 +790,6 @@ def test_create_dataset_pandas(self):
803790
citation=citation,
804791
attributes='auto',
805792
data=df,
806-
format=None,
807793
version_label='test',
808794
original_data_url=original_data_url,
809795
paper_url=paper_url
@@ -840,7 +826,6 @@ def test_create_dataset_pandas(self):
840826
citation=citation,
841827
attributes=attributes,
842828
data=df,
843-
format=None,
844829
version_label='test',
845830
original_data_url=original_data_url,
846831
paper_url=paper_url
@@ -892,7 +877,6 @@ def test_create_dataset_row_id_attribute_error(self):
892877
attributes='auto',
893878
data=df,
894879
row_id_attribute='unknown_row_id',
895-
format=None,
896880
version_label='test',
897881
original_data_url=original_data_url,
898882
paper_url=paper_url
@@ -939,7 +923,6 @@ def test_create_dataset_row_id_attribute_inference(self):
939923
attributes='auto',
940924
data=df,
941925
row_id_attribute=row_id,
942-
format=None,
943926
version_label='test',
944927
original_data_url=original_data_url,
945928
paper_url=paper_url
@@ -986,7 +969,6 @@ def test_create_dataset_attributes_auto_without_df(self):
986969
citation=citation,
987970
attributes=attributes,
988971
data=data,
989-
format=None,
990972
version_label='test',
991973
original_data_url=original_data_url,
992974
paper_url=paper_url

0 commit comments

Comments
 (0)