@@ -1015,6 +1015,7 @@ def test_ignore_attributes_dataset(self):
10151015 self .assertEqual (dataset .ignore_attribute , ['outlook' ])
10161016
10171017 # pass a list to ignore_attribute
1018+ ignore_attribute = ['outlook' , 'windy' ]
10181019 dataset = openml .datasets .functions .create_dataset (
10191020 name = name ,
10201021 description = description ,
@@ -1025,15 +1026,15 @@ def test_ignore_attributes_dataset(self):
10251026 licence = licence ,
10261027 default_target_attribute = default_target_attribute ,
10271028 row_id_attribute = None ,
1028- ignore_attribute = [ 'outlook' , 'windy' ] ,
1029+ ignore_attribute = ignore_attribute ,
10291030 citation = citation ,
10301031 attributes = 'auto' ,
10311032 data = df ,
10321033 version_label = 'test' ,
10331034 original_data_url = original_data_url ,
10341035 paper_url = paper_url
10351036 )
1036- self .assertEqual (dataset .ignore_attribute , [ 'outlook' , 'windy' ] )
1037+ self .assertEqual (dataset .ignore_attribute , ignore_attribute )
10371038
10381039 # raise an error if unknown type
10391040 err_msg = 'Wrong data type for ignore_attribute. Should be list.'
@@ -1057,6 +1058,81 @@ def test_ignore_attributes_dataset(self):
10571058 paper_url = paper_url
10581059 )
10591060
1061+ def test_publish_fetch_ignore_attribute (self ):
1062+ data = [
1063+ ['a' , 'sunny' , 85.0 , 85.0 , 'FALSE' , 'no' ],
1064+ ['b' , 'sunny' , 80.0 , 90.0 , 'TRUE' , 'no' ],
1065+ ['c' , 'overcast' , 83.0 , 86.0 , 'FALSE' , 'yes' ],
1066+ ['d' , 'rainy' , 70.0 , 96.0 , 'FALSE' , 'yes' ],
1067+ ['e' , 'rainy' , 68.0 , 80.0 , 'FALSE' , 'yes' ]
1068+ ]
1069+ column_names = ['rnd_str' , 'outlook' , 'temperature' , 'humidity' ,
1070+ 'windy' , 'play' ]
1071+ df = pd .DataFrame (data , columns = column_names )
1072+ # enforce the type of each column
1073+ df ['outlook' ] = df ['outlook' ].astype ('category' )
1074+ df ['windy' ] = df ['windy' ].astype ('bool' )
1075+ df ['play' ] = df ['play' ].astype ('category' )
1076+ # meta-information
1077+ name = '%s-pandas_testing_dataset' % self ._get_sentinel ()
1078+ description = 'Synthetic dataset created from a Pandas DataFrame'
1079+ creator = 'OpenML tester'
1080+ collection_date = '01-01-2018'
1081+ language = 'English'
1082+ licence = 'MIT'
1083+ default_target_attribute = 'play'
1084+ citation = 'None'
1085+ original_data_url = 'http://openml.github.io/openml-python'
1086+ paper_url = 'http://openml.github.io/openml-python'
1087+
1088+ # pass a list to ignore_attribute
1089+ ignore_attribute = ['outlook' , 'windy' ]
1090+ dataset = openml .datasets .functions .create_dataset (
1091+ name = name ,
1092+ description = description ,
1093+ creator = creator ,
1094+ contributor = None ,
1095+ collection_date = collection_date ,
1096+ language = language ,
1097+ licence = licence ,
1098+ default_target_attribute = default_target_attribute ,
1099+ row_id_attribute = None ,
1100+ ignore_attribute = ignore_attribute ,
1101+ citation = citation ,
1102+ attributes = 'auto' ,
1103+ data = df ,
1104+ version_label = 'test' ,
1105+ original_data_url = original_data_url ,
1106+ paper_url = paper_url
1107+ )
1108+
1109+ # publish dataset
1110+ upload_did = dataset .publish ()
1111+ # test if publish was successful
1112+ self .assertIsInstance (dataset .dataset_id , int )
1113+
1114+ trials = 0
1115+ timeout_limit = 100
1116+ dataset = None
1117+ # fetching from server
1118+ # loop till timeout and not successful
1119+ while True :
1120+ if trials > timeout_limit :
1121+ break
1122+ try :
1123+ dataset = openml .datasets .get_dataset (upload_did )
1124+ break
1125+ except Exception as e :
1126+ trials += 1
1127+ if str (e ).split (':' )[- 1 ].strip () == "Dataset not processed yet" :
1128+ # if returned code 273: Dataset not processed yet
1129+ continue
1130+ else :
1131+ raise RuntimeError (str (e ))
1132+ if dataset is None :
1133+ raise ValueError ("Failed to fetch uploaded dataset: {}" .format (upload_did ))
1134+ self .assertEqual (dataset .ignore_attribute , ignore_attribute )
1135+
10601136 def test_create_dataset_row_id_attribute_error (self ):
10611137 # meta-information
10621138 name = '%s-pandas_testing_dataset' % self ._get_sentinel ()
0 commit comments