@@ -813,56 +813,63 @@ def edit_dataset(
813813 original_data_url = None ,
814814 paper_url = None ,
815815) -> int :
816+ """ Edits an OpenMLDataset.
817+
818+ In addition to providing the dataset id of the dataset to edit (through data_id),
819+ you must specify a value for at least one of the optional function arguments,
820+ i.e. one value for a field to edit.
821+
822+ This function allows editing of both non-critical and critical fields.
823+ Critical fields are default_target_attribute, ignore_attribute, row_id_attribute.
824+
825+ - Editing non-critical data fields is allowed for all authenticated users.
826+ - Editing critical fields is allowed only for the owner, provided there are no tasks
827+ associated with this dataset.
828+
829+ If dataset has tasks or if the user is not the owner, the only way
830+ to edit critical fields is to use fork_dataset followed by edit_dataset.
831+
832+ Parameters
833+ ----------
834+ data_id : int
835+ ID of the dataset.
836+ description : str
837+ Description of the dataset.
838+ creator : str
839+ The person who created the dataset.
840+ contributor : str
841+ People who contributed to the current version of the dataset.
842+ collection_date : str
843+ The date the data was originally collected, given by the uploader.
844+ language : str
845+ Language in which the data is represented.
846+ Starts with 1 upper case letter, rest lower case, e.g. 'English'.
847+ default_target_attribute : str
848+ The default target attribute, if it exists.
849+ Can have multiple values, comma separated.
850+ ignore_attribute : str | list
851+ Attributes that should be excluded in modelling,
852+ such as identifiers and indexes.
853+ citation : str
854+ Reference(s) that should be cited when building on this data.
855+ row_id_attribute : str, optional
856+ The attribute that represents the row-id column, if present in the
857+ dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
858+ specified, the index of the dataframe will be used as the
859+ ``row_id_attribute``. If the name of the index is ``None``, it will
860+ be discarded.
861+
862+ .. versionadded: 0.8
863+ Inference of ``row_id_attribute`` from a dataframe.
864+ original_data_url : str, optional
865+ For derived data, the url to the original dataset.
866+ paper_url : str, optional
867+ Link to a paper describing the dataset.
868+
869+ Returns
870+ -------
871+ Dataset id
816872 """
817- Edits an OpenMLDataset.
818- Specify at least one field to edit, apart from data_id
819- - For certain fields, a new dataset version is created : attributes, data,
820- default_target_attribute, ignore_attribute, row_id_attribute.
821-
822- - For other fields, the uploader can edit the existing version.
823- No one except the uploader can edit the existing version.
824-
825- Parameters
826- ----------
827- data_id : int
828- ID of the dataset.
829- description : str
830- Description of the dataset.
831- creator : str
832- The person who created the dataset.
833- contributor : str
834- People who contributed to the current version of the dataset.
835- collection_date : str
836- The date the data was originally collected, given by the uploader.
837- language : str
838- Language in which the data is represented.
839- Starts with 1 upper case letter, rest lower case, e.g. 'English'.
840- default_target_attribute : str
841- The default target attribute, if it exists.
842- Can have multiple values, comma separated.
843- ignore_attribute : str | list
844- Attributes that should be excluded in modelling,
845- such as identifiers and indexes.
846- citation : str
847- Reference(s) that should be cited when building on this data.
848- row_id_attribute : str, optional
849- The attribute that represents the row-id column, if present in the
850- dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
851- specified, the index of the dataframe will be used as the
852- ``row_id_attribute``. If the name of the index is ``None``, it will
853- be discarded.
854-
855- .. versionadded: 0.8
856- Inference of ``row_id_attribute`` from a dataframe.
857- original_data_url : str, optional
858- For derived data, the url to the original dataset.
859- paper_url : str, optional
860- Link to a paper describing the dataset.
861-
862-
863- Returns
864- -------
865- data_id of the existing edited version or the new version created and published"""
866873 if not isinstance (data_id , int ):
867874 raise TypeError ("`data_id` must be of type `int`, not {}." .format (type (data_id )))
868875
@@ -897,6 +904,45 @@ def edit_dataset(
897904 return int (data_id )
898905
899906
907+ def fork_dataset (data_id : int ) -> int :
908+ """
909+ Creates a new dataset version, with the authenticated user as the new owner.
910+ The forked dataset can have distinct dataset meta-data,
911+ but the actual data itself is shared with the original version.
912+
913+ This API is intended for use when a user is unable to edit the critical fields of a dataset
914+ through the edit_dataset API.
915+ (Critical fields are default_target_attribute, ignore_attribute, row_id_attribute.)
916+
917+ Specifically, this happens when the user is:
918+ 1. Not the owner of the dataset.
919+ 2. User is the owner of the dataset, but the dataset has tasks.
920+
921+ In these two cases the only way to edit critical fields is:
922+ 1. STEP 1: Fork the dataset using fork_dataset API
923+ 2. STEP 2: Call edit_dataset API on the forked version.
924+
925+
926+ Parameters
927+ ----------
928+ data_id : int
929+ id of the dataset to be forked
930+
931+ Returns
932+ -------
933+ Dataset id of the forked dataset
934+
935+ """
936+ if not isinstance (data_id , int ):
937+ raise TypeError ("`data_id` must be of type `int`, not {}." .format (type (data_id )))
938+ # compose data fork parameters
939+ form_data = {"data_id" : data_id }
940+ result_xml = openml ._api_calls ._perform_api_call ("data/fork" , "post" , data = form_data )
941+ result = xmltodict .parse (result_xml )
942+ data_id = result ["oml:data_fork" ]["oml:id" ]
943+ return int (data_id )
944+
945+
900946def _get_dataset_description (did_cache_dir , dataset_id ):
901947 """Get the dataset description as xml dictionary.
902948
0 commit comments