Skip to content

Commit f70c720

Browse files
authored
change edit_api to reflect server (#941)
* change edit_api to reflect server * change test and example to reflect rest API changes * tutorial comments * Update datasets_tutorial.py
1 parent 5d2e0ce commit f70c720

File tree

3 files changed

+64
-125
lines changed

3 files changed

+64
-125
lines changed

examples/30_extended/datasets_tutorial.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#
2222
# * Use the output_format parameter to select output type
2323
# * Default gives 'dict' (other option: 'dataframe', see below)
24-
24+
#
2525
openml_list = openml.datasets.list_datasets() # returns a dict
2626

2727
# Show a nice table with some key data properties
@@ -117,31 +117,33 @@
117117
# This example uses the test server, to avoid editing a dataset on the main server.
118118
openml.config.start_using_configuration_for_example()
119119
############################################################################
120-
# Changes to these field edits existing version: allowed only for dataset owner
120+
# Edit non-critical fields, allowed for all authorized users:
121+
# description, creator, contributor, collection_date, language, citation,
122+
# original_data_url, paper_url
123+
desc = (
124+
"This data sets consists of 3 different types of irises' "
125+
"(Setosa, Versicolour, and Virginica) petal and sepal length,"
126+
" stored in a 150x4 numpy.ndarray"
127+
)
128+
did = 128
121129
data_id = edit_dataset(
122-
564,
123-
description="xor dataset represents XOR operation",
124-
contributor="",
125-
collection_date="2019-10-29 17:06:18",
126-
original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
127-
paper_url="",
128-
citation="kaggle",
130+
did,
131+
description=desc,
132+
creator="R.A.Fisher",
133+
collection_date="1937",
134+
citation="The use of multiple measurements in taxonomic problems",
129135
language="English",
130136
)
131137
edited_dataset = get_dataset(data_id)
132138
print(f"Edited dataset ID: {data_id}")
133139

134140

135141
############################################################################
136-
# Changes to these fields: attributes, default_target_attribute,
137-
# row_id_attribute, ignore_attribute generates a new edited version: allowed for anyone
138-
139-
new_attributes = [
140-
("x0", "REAL"),
141-
("x1", "REAL"),
142-
("y", "REAL"),
143-
]
144-
data_id = edit_dataset(564, attributes=new_attributes)
142+
# Edit critical fields, allowed only for owners of the dataset:
143+
# default_target_attribute, row_id_attribute, ignore_attribute
144+
# To edit critical fields of a dataset owned by you, configure the API key:
145+
# openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
146+
data_id = edit_dataset(564, default_target_attribute="y")
145147
print(f"Edited dataset ID: {data_id}")
146148

147149
openml.config.stop_using_configuration_for_example()

openml/datasets/functions.py

Lines changed: 3 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -806,8 +806,6 @@ def edit_dataset(
806806
contributor=None,
807807
collection_date=None,
808808
language=None,
809-
attributes=None,
810-
data=None,
811809
default_target_attribute=None,
812810
ignore_attribute=None,
813811
citation=None,
@@ -839,17 +837,6 @@ def edit_dataset(
839837
language : str
840838
Language in which the data is represented.
841839
Starts with 1 upper case letter, rest lower case, e.g. 'English'.
842-
attributes : list, dict, or 'auto'
843-
A list of tuples. Each tuple consists of the attribute name and type.
844-
If passing a pandas DataFrame, the attributes can be automatically
845-
inferred by passing ``'auto'``. Specific attributes can be manually
846-
specified by a passing a dictionary where the key is the name of the
847-
attribute and the value is the data type of the attribute.
848-
data : ndarray, list, dataframe, coo_matrix, shape (n_samples, n_features)
849-
An array that contains both the attributes and the targets. When
850-
providing a dataframe, the attribute names and type can be inferred by
851-
passing ``attributes='auto'``.
852-
The target feature is indicated as meta-data of the dataset.
853840
default_target_attribute : str
854841
The default target attribute, if it exists.
855842
Can have multiple values, comma separated.
@@ -879,54 +866,6 @@ def edit_dataset(
879866
if not isinstance(data_id, int):
880867
raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
881868

882-
# case 1, changing these fields creates a new version of the dataset with changed field
883-
if any(
884-
field is not None
885-
for field in [
886-
data,
887-
attributes,
888-
default_target_attribute,
889-
row_id_attribute,
890-
ignore_attribute,
891-
]
892-
):
893-
logger.warning("Creating a new version of dataset, cannot edit existing version")
894-
895-
# Get old dataset and features
896-
dataset = get_dataset(data_id)
897-
df, y, categorical, attribute_names = dataset.get_data(dataset_format="dataframe")
898-
attributes_old = attributes_arff_from_df(df)
899-
900-
# Sparse data needs to be provided in a different format from dense data
901-
if dataset.format == "sparse_arff":
902-
df, y, categorical, attribute_names = dataset.get_data(dataset_format="array")
903-
data_old = coo_matrix(df)
904-
else:
905-
data_old = df
906-
data_new = data if data is not None else data_old
907-
dataset_new = create_dataset(
908-
name=dataset.name,
909-
description=description or dataset.description,
910-
creator=creator or dataset.creator,
911-
contributor=contributor or dataset.contributor,
912-
collection_date=collection_date or dataset.collection_date,
913-
language=language or dataset.language,
914-
licence=dataset.licence,
915-
attributes=attributes or attributes_old,
916-
data=data_new,
917-
default_target_attribute=default_target_attribute or dataset.default_target_attribute,
918-
ignore_attribute=ignore_attribute or dataset.ignore_attribute,
919-
citation=citation or dataset.citation,
920-
row_id_attribute=row_id_attribute or dataset.row_id_attribute,
921-
original_data_url=original_data_url or dataset.original_data_url,
922-
paper_url=paper_url or dataset.paper_url,
923-
update_comment=dataset.update_comment,
924-
version_label=dataset.version_label,
925-
)
926-
dataset_new.publish()
927-
return dataset_new.dataset_id
928-
929-
# case 2, changing any of these fields will update existing dataset
930869
# compose data edit parameters as xml
931870
form_data = {"data_id": data_id}
932871
xml = OrderedDict() # type: 'OrderedDict[str, OrderedDict]'
@@ -937,6 +876,9 @@ def edit_dataset(
937876
xml["oml:data_edit_parameters"]["oml:contributor"] = contributor
938877
xml["oml:data_edit_parameters"]["oml:collection_date"] = collection_date
939878
xml["oml:data_edit_parameters"]["oml:language"] = language
879+
xml["oml:data_edit_parameters"]["oml:default_target_attribute"] = default_target_attribute
880+
xml["oml:data_edit_parameters"]["oml:row_id_attribute"] = row_id_attribute
881+
xml["oml:data_edit_parameters"]["oml:ignore_attribute"] = ignore_attribute
940882
xml["oml:data_edit_parameters"]["oml:citation"] = citation
941883
xml["oml:data_edit_parameters"]["oml:original_data_url"] = original_data_url
942884
xml["oml:data_edit_parameters"]["oml:paper_url"] = paper_url

tests/test_datasets/test_dataset_functions.py

Lines changed: 41 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,57 +1341,43 @@ def test_get_dataset_cache_format_feather(self):
13411341
self.assertEqual(len(attribute_names), X.shape[1])
13421342

13431343
def test_data_edit(self):
1344-
1345-
# admin key for test server (only admins or owners can edit datasets).
1346-
# all users can edit their own datasets)
1347-
openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
1348-
1349-
# case 1, editing description, creator, contributor, collection_date, original_data_url,
1350-
# paper_url, citation, language edits existing dataset.
1351-
did = 564
1352-
result = edit_dataset(
1353-
did,
1354-
description="xor dataset represents XOR operation",
1355-
contributor="",
1356-
collection_date="2019-10-29 17:06:18",
1357-
original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
1358-
paper_url="",
1359-
citation="kaggle",
1360-
language="English",
1344+
# Case 1
1345+
# All users can edit non-critical fields of datasets
1346+
desc = (
1347+
"This data sets consists of 3 different types of irises' "
1348+
"(Setosa, Versicolour, and Virginica) petal and sepal length,"
1349+
" stored in a 150x4 numpy.ndarray"
13611350
)
1362-
self.assertEqual(result, did)
1363-
1364-
# case 2, editing data, attributes, default_target_attribute, row_id_attribute,
1365-
# ignore_attribute generates a new dataset
1366-
1367-
column_names = [
1368-
("input1", "REAL"),
1369-
("input2", "REAL"),
1370-
("y", "REAL"),
1371-
]
1372-
desc = "xor dataset represents XOR operation"
1351+
did = 128
13731352
result = edit_dataset(
1374-
564,
1353+
did,
13751354
description=desc,
1376-
contributor="",
1377-
collection_date="2019-10-29 17:06:18",
1378-
attributes=column_names,
1379-
original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
1380-
paper_url="",
1381-
citation="kaggle",
1355+
creator="R.A.Fisher",
1356+
collection_date="1937",
1357+
citation="The use of multiple measurements in taxonomic problems",
13821358
language="English",
13831359
)
1384-
self.assertNotEqual(did, result)
1360+
self.assertEqual(did, result)
1361+
edited_dataset = openml.datasets.get_dataset(did)
1362+
self.assertEqual(edited_dataset.description, desc)
1363+
1364+
# Case 2
1365+
# only owners (or admin) can edit all critical fields of datasets
1366+
# this is a dataset created by CI, so it is editable by this test
1367+
did = 315
1368+
result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2")
1369+
self.assertEqual(did, result)
1370+
edited_dataset = openml.datasets.get_dataset(did)
1371+
self.assertEqual(edited_dataset.ignore_attribute, ["col_2"])
13851372

13861373
def test_data_edit_errors(self):
1387-
1388-
# admin key for test server (only admins or owners can edit datasets).
1389-
openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
13901374
# Check server exception when no field to edit is provided
13911375
self.assertRaisesRegex(
13921376
OpenMLServerException,
1393-
"Please provide atleast one field among description, creator, contributor, "
1394-
"collection_date, language, citation, original_data_url or paper_url to edit.",
1377+
"Please provide atleast one field among description, creator, "
1378+
"contributor, collection_date, language, citation, "
1379+
"original_data_url, default_target_attribute, row_id_attribute, "
1380+
"ignore_attribute or paper_url to edit.",
13951381
edit_dataset,
13961382
data_id=564,
13971383
)
@@ -1403,12 +1389,21 @@ def test_data_edit_errors(self):
14031389
data_id=100000,
14041390
description="xor operation dataset",
14051391
)
1406-
# Check server exception when a non-owner or non-admin tries to edit existing dataset
1407-
openml.config.apikey = "5f0b74b33503e4ad4a7181a91e28719f"
1392+
# Check server exception when owner/admin edits critical features of dataset with tasks
14081393
self.assertRaisesRegex(
14091394
OpenMLServerException,
1410-
"Dataset is not owned by you",
1395+
"Critical features default_target_attribute, row_id_attribute and ignore_attribute "
1396+
"can only be edited for datasets without any tasks.",
14111397
edit_dataset,
1412-
data_id=564,
1413-
description="xor data",
1398+
data_id=223,
1399+
default_target_attribute="y",
1400+
)
1401+
# Check server exception when a non-owner or non-admin tries to edit critical features
1402+
self.assertRaisesRegex(
1403+
OpenMLServerException,
1404+
"Critical features default_target_attribute, row_id_attribute and ignore_attribute "
1405+
"can be edited only by the owner. Fork the dataset if changes are required.",
1406+
edit_dataset,
1407+
data_id=128,
1408+
default_target_attribute="y",
14141409
)

0 commit comments

Comments
 (0)