Skip to content

Commit 957dca1

Browse files
committed
Add tests and make a proposal for changes
1 parent 116b271 commit 957dca1

File tree

5 files changed

+98
-157
lines changed

5 files changed

+98
-157
lines changed

R/convertOMLSetupParamsToDT.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing.
2+
3+
# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e.
4+
# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2
5+
6+
7+
8+
#' @title Extract Parameters from an OpenML run into a flat structure
9+
#'
10+
#' @param run.evals [\code{data.frame}]\cr
11+
#' Result of caling listOMLRunEvaluations(..., setup = TRUE).
12+
#' @param drop.constant [\code{logical(1)]\cr
13+
#' Should constant columns be dropped before returning the result?
14+
#'
15+
#' @return [\code{\link{data.table}}].
16+
#' @family run-related functions
17+
#' @export
18+
convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) {
19+
assert_data_frame(run.evals)
20+
assert_true(!is.null(run.evals$setup_parameters))
21+
assert_flag(drop.constant)
22+
setup_params = run.evals$setup_parameters
23+
out = lapply(setup_params, function(params) {
24+
params[!(parameter_name == "verbose" & data_type == "boolean"), ]
25+
params[, convertValueByType(parameter_name, value, data_type)]
26+
})
27+
dt = rbindlist(out, fill = TRUE)
28+
if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with =FALSE]
29+
run.evals$setup_parameters = NULL
30+
return(cbind(run.evals, dt))
31+
}
32+
33+
34+
# Convert values according to a parameter's type.
35+
# Note that this is very unreliably.
36+
convertValueByType = function(parameter_name, value, type) {
37+
value = Map(function(v, t) {
38+
v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA
39+
v = gsub(""", "", v)
40+
if (t %in% c("boolean", "bool")) v = as.logical(v)
41+
else if (t %in% c("float", "number")) v = as.numeric(v)
42+
else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v))
43+
return(v)
44+
}, value, type)
45+
names(value) = gsub(""", "", parameter_name)
46+
return(as.data.table(value))
47+
}

R/getOMLRunEvaluations.R

Lines changed: 0 additions & 129 deletions
This file was deleted.

R/listOMLRunEvaluations.R

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,15 @@
1515

1616
content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity)
1717
if (is.null(content)) return(data.frame())
18-
browser()
1918
lst_content = fromJSON(txt = content, simplifyVector = FALSE)
2019
evals = lst_content$evaluations$evaluation
2120

21+
if (setup) {
22+
param_list = lapply(evals, function(x) {
23+
parameters = as.data.table(cleanupSetupParameters(x$parameters))[-25,]
24+
})
25+
}
26+
2227
evals = rbindlist(lapply(evals, function(x) {
2328
if (is.null(x$value)) x$value = NA
2429
if (is.null(x$array_data)) x$array_data = NA else x$array_data = collapse(x$array_data)
@@ -79,7 +84,7 @@
7984
values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)),
8085
stringsAsFactors = FALSE)
8186
}
82-
87+
if (setup) evals$setup_parameters = param_list
8388
return(evals)
8489
}
8590

R/listOMLSetup.R

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,7 @@
1111
setup = fromJSON(txt = content)$setups$setup
1212
sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id)
1313

14-
# Get parameters and clean them up
15-
param = setup$parameter
16-
if (!is.null(names(param))) {
17-
# if elements have a name, it refers to parameter
18-
param = param[!vlapply(param, function(x) length(x) == 0)]
19-
param = as.data.frame(param, stringsAsFactors = FALSE)
20-
param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
21-
} else {
22-
# add names
23-
param = setNames(param, 1:length(param))
24-
# filter out NULL or empty elements
25-
param = param[!vlapply(param, function(x) length(x) == 0)]
26-
# inside each element, replace empty values with NA
27-
param = lapply(param, function(x) {
28-
replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
29-
})
30-
param = rbindlist(param, fill = TRUE, idcol = "join_id")
31-
param = as.data.frame(param, stringsAsFactors = FALSE)
32-
}
33-
34-
list.cols = colnames(param)[vlapply(param, is.list)]
35-
for (col in list.cols) {
36-
ind = which(vlapply(param[[col]], function(i) length(i) == 0))
37-
param[[col]][ind] = NA_character_
38-
param[[col]] = unlist(param[[col]], recursive = FALSE)
39-
}
14+
param = cleanupSetupParameters(setup$parameter)
4015

4116
ret = merge(param, sid)
4217
ret$id = ret$join_id = NULL
@@ -66,3 +41,33 @@
6641
#' @export
6742
#' @example inst/examples/listOMLSetup.R
6843
listOMLSetup = memoise(.listOMLSetup)
44+
45+
46+
# Get parameters and clean them up
47+
cleanupSetupParameters = function(param) {
48+
if (!is.null(names(param))) {
49+
# if elements have a name, it refers to parameter
50+
param = param[!vlapply(param, function(x) length(x) == 0)]
51+
param = as.data.frame(param, stringsAsFactors = FALSE)
52+
param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
53+
} else {
54+
# add names
55+
param = setNames(param, 1:length(param))
56+
# filter out NULL or empty elements
57+
param = param[!vlapply(param, function(x) length(x) == 0)]
58+
# inside each element, replace empty values with NA
59+
param = lapply(param, function(x) {
60+
replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
61+
})
62+
param = rbindlist(param, fill = TRUE, idcol = "join_id")
63+
param = as.data.frame(param, stringsAsFactors = FALSE)
64+
}
65+
66+
list.cols = colnames(param)[vlapply(param, is.list)]
67+
for (col in list.cols) {
68+
ind = which(vlapply(param[[col]], function(i) length(i) == 0))
69+
param[[col]][ind] = NA_character_
70+
param[[col]] = unlist(param[[col]], recursive = FALSE)
71+
}
72+
return(param)
73+
}

tests/testthat/test_server_listOMLRunEvaluations.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", {
3030
expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m"))
3131
})
3232
})
33+
34+
test_that("listOMLRunEvaluations", {
35+
with_main_server({
36+
setOMLConfig(server = "https://test.openml.org/api/v1")
37+
task.id = 6L
38+
39+
# filter only successful runs
40+
run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20)
41+
expect_data_frame(run.evals, min.rows = 1L, col.names = "unique")
42+
expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"),
43+
names(run.evals))
44+
})
45+
})

0 commit comments

Comments
 (0)