@@ -799,6 +799,154 @@ def status_update(data_id, status):
799799 raise ValueError ("Data id/status does not collide" )
800800
801801
802+ def edit_dataset (
803+ data_id ,
804+ description = None ,
805+ creator = None ,
806+ contributor = None ,
807+ collection_date = None ,
808+ language = None ,
809+ attributes = None ,
810+ data = None ,
811+ default_target_attribute = None ,
812+ ignore_attribute = None ,
813+ citation = None ,
814+ row_id_attribute = None ,
815+ original_data_url = None ,
816+ paper_url = None ,
817+ ) -> int :
818+ """
819+ Edits an OpenMLDataset.
820+ Specify atleast one field to edit, apart from data_id
821+ - For certain fields, a new dataset version is created : attributes, data,
822+ default_target_attribute, ignore_attribute, row_id_attribute.
823+
824+ - For other fields, the uploader can edit the exisiting version.
825+ Noone except the uploader can edit the exisitng version.
826+
827+ Parameters
828+ ----------
829+ data_id : int
830+ ID of the dataset.
831+ description : str
832+ Description of the dataset.
833+ creator : str
834+ The person who created the dataset.
835+ contributor : str
836+ People who contributed to the current version of the dataset.
837+ collection_date : str
838+ The date the data was originally collected, given by the uploader.
839+ language : str
840+ Language in which the data is represented.
841+ Starts with 1 upper case letter, rest lower case, e.g. 'English'.
842+ attributes : list, dict, or 'auto'
843+ A list of tuples. Each tuple consists of the attribute name and type.
844+ If passing a pandas DataFrame, the attributes can be automatically
845+ inferred by passing ``'auto'``. Specific attributes can be manually
846+ specified by a passing a dictionary where the key is the name of the
847+ attribute and the value is the data type of the attribute.
848+ data : ndarray, list, dataframe, coo_matrix, shape (n_samples, n_features)
849+ An array that contains both the attributes and the targets. When
850+ providing a dataframe, the attribute names and type can be inferred by
851+ passing ``attributes='auto'``.
852+ The target feature is indicated as meta-data of the dataset.
853+ default_target_attribute : str
854+ The default target attribute, if it exists.
855+ Can have multiple values, comma separated.
856+ ignore_attribute : str | list
857+ Attributes that should be excluded in modelling,
858+ such as identifiers and indexes.
859+ citation : str
860+ Reference(s) that should be cited when building on this data.
861+ row_id_attribute : str, optional
862+ The attribute that represents the row-id column, if present in the
863+ dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
864+ specified, the index of the dataframe will be used as the
865+ ``row_id_attribute``. If the name of the index is ``None``, it will
866+ be discarded.
867+
868+ .. versionadded: 0.8
869+ Inference of ``row_id_attribute`` from a dataframe.
870+ original_data_url : str, optional
871+ For derived data, the url to the original dataset.
872+ paper_url : str, optional
873+ Link to a paper describing the dataset.
874+
875+
876+ Returns
877+ -------
878+ data_id of the existing edited version or the new version created and published"""
879+ if not isinstance (data_id , int ):
880+ raise TypeError ("`data_id` must be of type `int`, not {}." .format (type (data_id )))
881+
882+ # case 1, changing these fields creates a new version of the dataset with changed field
883+ if any (
884+ field is not None
885+ for field in [
886+ data ,
887+ attributes ,
888+ default_target_attribute ,
889+ row_id_attribute ,
890+ ignore_attribute ,
891+ ]
892+ ):
893+ logger .warning ("Creating a new version of dataset, cannot edit existing version" )
894+ dataset = get_dataset (data_id )
895+
896+ decoded_arff = dataset ._get_arff (format = "arff" )
897+ data_old = decoded_arff ["data" ]
898+ data_new = data if data is not None else data_old
899+ dataset_new = create_dataset (
900+ name = dataset .name ,
901+ description = description or dataset .description ,
902+ creator = creator or dataset .creator ,
903+ contributor = contributor or dataset .contributor ,
904+ collection_date = collection_date or dataset .collection_date ,
905+ language = language or dataset .language ,
906+ licence = dataset .licence ,
907+ attributes = attributes or decoded_arff ["attributes" ],
908+ data = data_new ,
909+ default_target_attribute = default_target_attribute or dataset .default_target_attribute ,
910+ ignore_attribute = ignore_attribute or dataset .ignore_attribute ,
911+ citation = citation or dataset .citation ,
912+ row_id_attribute = row_id_attribute or dataset .row_id_attribute ,
913+ original_data_url = original_data_url or dataset .original_data_url ,
914+ paper_url = paper_url or dataset .paper_url ,
915+ update_comment = dataset .update_comment ,
916+ version_label = dataset .version_label ,
917+ )
918+ dataset_new .publish ()
919+ return dataset_new .dataset_id
920+
921+ # case 2, changing any of these fields will update existing dataset
922+ # compose data edit parameters as xml
923+ form_data = {"data_id" : data_id }
924+ xml = OrderedDict () # type: 'OrderedDict[str, OrderedDict]'
925+ xml ["oml:data_edit_parameters" ] = OrderedDict ()
926+ xml ["oml:data_edit_parameters" ]["@xmlns:oml" ] = "http://openml.org/openml"
927+ xml ["oml:data_edit_parameters" ]["oml:description" ] = description
928+ xml ["oml:data_edit_parameters" ]["oml:creator" ] = creator
929+ xml ["oml:data_edit_parameters" ]["oml:contributor" ] = contributor
930+ xml ["oml:data_edit_parameters" ]["oml:collection_date" ] = collection_date
931+ xml ["oml:data_edit_parameters" ]["oml:language" ] = language
932+ xml ["oml:data_edit_parameters" ]["oml:citation" ] = citation
933+ xml ["oml:data_edit_parameters" ]["oml:original_data_url" ] = original_data_url
934+ xml ["oml:data_edit_parameters" ]["oml:paper_url" ] = paper_url
935+
936+ # delete None inputs
937+ for k in list (xml ["oml:data_edit_parameters" ]):
938+ if not xml ["oml:data_edit_parameters" ][k ]:
939+ del xml ["oml:data_edit_parameters" ][k ]
940+
941+ file_elements = {"edit_parameters" : ("description.xml" , xmltodict .unparse (xml ))}
942+ result_xml = openml ._api_calls ._perform_api_call (
943+ "data/edit" , "post" , data = form_data , file_elements = file_elements
944+ )
945+ result = xmltodict .parse (result_xml )
946+ data_id = result ["oml:data_edit" ]["oml:id" ]
947+ return int (data_id )
948+
949+
802950def _get_dataset_description (did_cache_dir , dataset_id ):
803951 """Get the dataset description as xml dictionary.
804952
0 commit comments