77
88import xmltodict
99from sklearn .dummy import DummyClassifier
10+ from sklearn .linear_model import LinearRegression
1011from sklearn .tree import DecisionTreeClassifier
1112from sklearn .model_selection import GridSearchCV
1213from sklearn .pipeline import Pipeline
14+ from sklearn .base import clone
1315
1416from openml import OpenMLRun
1517from openml .testing import TestBase , SimpleImputer
@@ -39,6 +41,25 @@ def test_tagging(self):
3941 run_list = openml .runs .list_runs (tag = tag )
4042 self .assertEqual (len (run_list ), 0 )
4143
44+ @staticmethod
45+ def _test_prediction_data_equal (run , run_prime ):
46+ # Determine which attributes are numeric and which not
47+ num_cols = np .array (
48+ [d_type == "NUMERIC" for _ , d_type in run ._generate_arff_dict ()["attributes" ]]
49+ )
50+ # Get run data consistently
51+ # (For run from server, .data_content does not exist)
52+ run_data_content = run .predictions .values
53+ run_prime_data_content = run_prime .predictions .values
54+
55+ # Assert numeric and string parts separately
56+ numeric_part = np .array (run_data_content [:, num_cols ], dtype = float )
57+ numeric_part_prime = np .array (run_prime_data_content [:, num_cols ], dtype = float )
58+ string_part = run_data_content [:, ~ num_cols ]
59+ string_part_prime = run_prime_data_content [:, ~ num_cols ]
60+ np .testing .assert_array_almost_equal (numeric_part , numeric_part_prime )
61+ np .testing .assert_array_equal (string_part , string_part_prime )
62+
4263 def _test_run_obj_equals (self , run , run_prime ):
4364 for dictionary in ["evaluations" , "fold_evaluations" , "sample_evaluations" ]:
4465 if getattr (run , dictionary ) is not None :
@@ -49,14 +70,9 @@ def _test_run_obj_equals(self, run, run_prime):
4970 if other is not None :
5071 self .assertDictEqual (other , dict ())
5172 self .assertEqual (run ._to_xml (), run_prime ._to_xml ())
73+ self ._test_prediction_data_equal (run , run_prime )
5274
53- numeric_part = np .array (np .array (run .data_content )[:, 0 :- 2 ], dtype = float )
54- numeric_part_prime = np .array (np .array (run_prime .data_content )[:, 0 :- 2 ], dtype = float )
55- string_part = np .array (run .data_content )[:, - 2 :]
56- string_part_prime = np .array (run_prime .data_content )[:, - 2 :]
57- np .testing .assert_array_almost_equal (numeric_part , numeric_part_prime )
58- np .testing .assert_array_equal (string_part , string_part_prime )
59-
75+ # Test trace
6076 if run .trace is not None :
6177 run_trace_content = run .trace .trace_to_arff ()["data" ]
6278 else :
@@ -192,6 +208,73 @@ def test_to_from_filesystem_no_model(self):
192208 with self .assertRaises (ValueError , msg = "Could not find model.pkl" ):
193209 openml .runs .OpenMLRun .from_filesystem (cache_path )
194210
211+ @staticmethod
212+ def _get_models_tasks_for_tests ():
213+ model_clf = Pipeline (
214+ [
215+ ("imputer" , SimpleImputer (strategy = "mean" )),
216+ ("classifier" , DummyClassifier (strategy = "prior" )),
217+ ]
218+ )
219+ model_reg = Pipeline (
220+ [
221+ ("imputer" , SimpleImputer (strategy = "mean" )),
222+ (
223+ "regressor" ,
224+ # LR because dummy does not produce enough float-like values
225+ LinearRegression (),
226+ ),
227+ ]
228+ )
229+
230+ task_clf = openml .tasks .get_task (119 ) # diabetes; hold out validation
231+ task_reg = openml .tasks .get_task (733 ) # quake; crossvalidation
232+
233+ return [(model_clf , task_clf ), (model_reg , task_reg )]
234+
235+ @staticmethod
236+ def assert_run_prediction_data (task , run , model ):
237+ # -- Get y_pred and y_true as it should be stored in the run
238+ n_repeats , n_folds , n_samples = task .get_split_dimensions ()
239+ if (n_repeats > 1 ) or (n_samples > 1 ):
240+ raise ValueError ("Test does not support this task type's split dimensions." )
241+
242+ X , y = task .get_X_and_y ()
243+
244+ # Check correctness of y_true and y_pred in run
245+ for fold_id in range (n_folds ):
246+ # Get data for fold
247+ _ , test_indices = task .get_train_test_split_indices (repeat = 0 , fold = fold_id , sample = 0 )
248+ train_mask = np .full (len (X ), True )
249+ train_mask [test_indices ] = False
250+
251+ # Get train / test
252+ X_train = X [train_mask ]
253+ y_train = y [train_mask ]
254+ X_test = X [~ train_mask ]
255+ y_test = y [~ train_mask ]
256+
257+ # Get y_pred
258+ y_pred = model .fit (X_train , y_train ).predict (X_test )
259+
260+ # Get stored data for fold
261+ saved_fold_data = run .predictions [run .predictions ["fold" ] == fold_id ].sort_values (
262+ by = "row_id"
263+ )
264+ saved_y_pred = saved_fold_data ["prediction" ].values
265+ gt_key = "truth" if "truth" in list (saved_fold_data ) else "correct"
266+ saved_y_test = saved_fold_data [gt_key ].values
267+
268+ assert_method = np .testing .assert_array_almost_equal
269+ if task .task_type == "Supervised Classification" :
270+ y_pred = np .take (task .class_labels , y_pred )
271+ y_test = np .take (task .class_labels , y_test )
272+ assert_method = np .testing .assert_array_equal
273+
274+ # Assert correctness
275+ assert_method (y_pred , saved_y_pred )
276+ assert_method (y_test , saved_y_test )
277+
195278 @pytest .mark .sklearn
196279 def test_publish_with_local_loaded_flow (self ):
197280 """
@@ -200,40 +283,85 @@ def test_publish_with_local_loaded_flow(self):
200283 """
201284 extension = openml .extensions .sklearn .SklearnExtension ()
202285
203- model = Pipeline (
204- [("imputer" , SimpleImputer (strategy = "mean" )), ("classifier" , DummyClassifier ())]
205- )
206- task = openml .tasks .get_task (119 ) # diabetes; crossvalidation
286+ for model , task in self ._get_models_tasks_for_tests ():
287+ # Make sure the flow does not exist on the server yet.
288+ flow = extension .model_to_flow (model )
289+ self ._add_sentinel_to_flow_name (flow )
290+ self .assertFalse (openml .flows .flow_exists (flow .name , flow .external_version ))
291+
292+ run = openml .runs .run_flow_on_task (
293+ flow = flow ,
294+ task = task ,
295+ add_local_measures = False ,
296+ avoid_duplicate_runs = False ,
297+ upload_flow = False ,
298+ )
207299
208- # Make sure the flow does not exist on the server yet.
209- flow = extension .model_to_flow (model )
210- self ._add_sentinel_to_flow_name (flow )
211- self .assertFalse (openml .flows .flow_exists (flow .name , flow .external_version ))
300+ # Make sure that the flow has not been uploaded as requested.
301+ self .assertFalse (openml .flows .flow_exists (flow .name , flow .external_version ))
212302
213- run = openml .runs .run_flow_on_task (
214- flow = flow ,
215- task = task ,
216- add_local_measures = False ,
217- avoid_duplicate_runs = False ,
218- upload_flow = False ,
219- )
303+ # Make sure that the prediction data stored in the run is correct.
304+ self .assert_run_prediction_data (task , run , clone (model ))
220305
221- # Make sure that the flow has not been uploaded as requested.
222- self .assertFalse (openml .flows .flow_exists (flow .name , flow .external_version ))
306+ cache_path = os .path .join (self .workdir , "runs" , str (random .getrandbits (128 )))
307+ run .to_filesystem (cache_path )
308+ # obtain run from filesystem
309+ loaded_run = openml .runs .OpenMLRun .from_filesystem (cache_path )
310+ loaded_run .publish ()
223311
224- cache_path = os .path .join (self .workdir , "runs" , str (random .getrandbits (128 )))
225- run .to_filesystem (cache_path )
226- # obtain run from filesystem
227- loaded_run = openml .runs .OpenMLRun .from_filesystem (cache_path )
228- loaded_run .publish ()
229- TestBase ._mark_entity_for_removal ("run" , loaded_run .run_id )
230- TestBase .logger .info (
231- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], loaded_run .run_id )
232- )
312+ # Clean up
313+ TestBase ._mark_entity_for_removal ("run" , loaded_run .run_id )
314+ TestBase .logger .info (
315+ "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], loaded_run .run_id )
316+ )
317+
318+ # make sure the flow is published as part of publishing the run.
319+ self .assertTrue (openml .flows .flow_exists (flow .name , flow .external_version ))
320+ openml .runs .get_run (loaded_run .run_id )
321+
322+ @pytest .mark .sklearn
323+ def test_offline_and_online_run_identical (self ):
324+
325+ extension = openml .extensions .sklearn .SklearnExtension ()
326+
327+ for model , task in self ._get_models_tasks_for_tests ():
328+ # Make sure the flow does not exist on the server yet.
329+ flow = extension .model_to_flow (model )
330+ self ._add_sentinel_to_flow_name (flow )
331+ self .assertFalse (openml .flows .flow_exists (flow .name , flow .external_version ))
332+
333+ run = openml .runs .run_flow_on_task (
334+ flow = flow ,
335+ task = task ,
336+ add_local_measures = False ,
337+ avoid_duplicate_runs = False ,
338+ upload_flow = False ,
339+ )
233340
234- # make sure the flow is published as part of publishing the run.
235- self .assertTrue (openml .flows .flow_exists (flow .name , flow .external_version ))
236- openml .runs .get_run (loaded_run .run_id )
341+ # Make sure that the flow has not been uploaded as requested.
342+ self .assertFalse (openml .flows .flow_exists (flow .name , flow .external_version ))
343+
344+ # Load from filesystem
345+ cache_path = os .path .join (self .workdir , "runs" , str (random .getrandbits (128 )))
346+ run .to_filesystem (cache_path )
347+ loaded_run = openml .runs .OpenMLRun .from_filesystem (cache_path )
348+
349+ # Assert identical for offline - offline
350+ self ._test_run_obj_equals (run , loaded_run )
351+
352+ # Publish and test for offline - online
353+ run .publish ()
354+ self .assertTrue (openml .flows .flow_exists (flow .name , flow .external_version ))
355+
356+ try :
357+ online_run = openml .runs .get_run (run .run_id , ignore_cache = True )
358+ self ._test_prediction_data_equal (run , online_run )
359+ finally :
360+ # Clean up
361+ TestBase ._mark_entity_for_removal ("run" , run .run_id )
362+ TestBase .logger .info (
363+ "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], loaded_run .run_id )
364+ )
237365
238366 def test_run_setup_string_included_in_xml (self ):
239367 SETUP_STRING = "setup-string"
0 commit comments