Skip to content

Commit 677dddf

Browse files
authored
Merge pull request #4 from JuliaAI/runsearch
Runsearch
2 parents 81263c0 + 12db7ed commit 677dddf

File tree

5 files changed

+218
-64
lines changed

5 files changed

+218
-64
lines changed

docs/src/reference.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ MLFlowExperiment
1212
MLFlowRun
1313
MLFlowRunInfo
1414
MLFlowRunData
15+
MLFlowRunDataMetric
1516
MLFlowRunStatus
1617
```
1718

@@ -31,6 +32,7 @@ createrun
3132
getrun
3233
updaterun
3334
deleterun
35+
searchruns
3436
```
3537

3638
# Logging

src/MLFlowClient.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export
2525
MLFlowRunStatus,
2626
MLFlowRunInfo,
2727
MLFlowRunData,
28+
MLFlowRunDataMetric,
2829
MLFlowRun
2930

3031
include("utils.jl")
@@ -41,7 +42,8 @@ export
4142
createrun,
4243
getrun,
4344
updaterun,
44-
deleterun
45+
deleterun,
46+
searchruns
4547

4648
include("logging.jl")
4749
export

src/runs.jl

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,65 @@ end
9595
deleterun(mlf::MLFlow, run_info::MLFlowRunInfo) = deleterun(mlf, run_info.run_id)
9696
deleterun(mlf::MLFlow, run::MLFlowRun) = deleterun(mlf, run.info)
9797

98+
"""
99+
searchruns(mlf::MLFlow, experiment_ids)
100+
101+
Searches for runs in an experiment.
102+
103+
# Arguments
104+
- `mlf`: [`MLFlow`](@ref) configuration.
105+
- `experiment_ids::AbstractVector{Integer}`: `experiment_id`s in which to search for runs. Can also be a single `Integer`.
106+
107+
# Keywords
108+
- `filter::String`: filter as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs)
109+
- `run_view_type::String`: one of `ACTIVE_ONLY`, `DELETED_ONLY`, or `ALL`.
110+
- `max_results::Integer`: 50,000 by default.
111+
- `order_by::String`: as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs)
112+
- `page_token::String`: paging functionality, handled automatically. Not meant to be passed by the user.
113+
114+
# Returns
115+
- vector of [`MLFlowRun`](@ref) runs that were found in the list of experiments.
116+
117+
"""
118+
function searchruns(mlf::MLFlow, experiment_ids::AbstractVector{<:Integer};
119+
filter::String="",
120+
run_view_type::String="ACTIVE_ONLY",
121+
max_results::Int64=50000,
122+
order_by::AbstractVector{<:String}=["attribute.start_time"],
123+
page_token::String=""
124+
)
125+
endpoint = "runs/search"
126+
run_view_type ["ACTIVE_ONLY", "DELETED_ONLY", "ALL"] || error("Unsupported run_view_type = $run_view_type")
127+
kwargs = (
128+
experiment_ids=experiment_ids,
129+
filter=filter,
130+
run_view_type=run_view_type,
131+
max_results=max_results,
132+
order_by=order_by
133+
)
134+
if !isempty(page_token)
135+
kwargs = (; kwargs..., page_token=page_token)
136+
end
137+
138+
result = mlfpost(mlf, endpoint; kwargs...)
139+
haskey(result, "runs") || return MLFlowRun[]
140+
141+
runs = map(x -> MLFlowRun(x["info"], x["data"]), result["runs"])
142+
143+
# paging functionality using recursion
144+
if haskey(result, "next_page_token") && !isempty(result["next_page_token"])
145+
kwargs = (
146+
filter=filter,
147+
run_view_type=run_view_type,
148+
max_results=max_results,
149+
order_by=order_by,
150+
page_token=result["next_page_token"]
151+
)
152+
nextruns = searchruns(mlf, experiment_ids; kwargs...)
153+
return vcat(runs, nextruns)
154+
end
155+
156+
runs
157+
end
158+
searchruns(mlf::MLFlow, experiment_id::Integer; kwargs...) =
159+
searchruns(mlf, [experiment_id]; kwargs...)

src/types.jl

Lines changed: 90 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
"""
2-
MLFlow(baseuri; apiversion)
2+
MLFlow
33
44
Base type which defines location and version for MLFlow API service.
55
66
# Fields
77
- `baseuri::String`: base MLFlow tracking URI, e.g. `http://localhost:5000`
88
- `apiversion`: used API version, e.g. `2.0`
99
10+
# Constructors
11+
12+
- `MLFlow(baseuri; apiversion=2.0)`
1013
# Examples
1114
``` julia-repl
1215
julia> mlf = MLFlow("http://localhost:5000")
1316
MLFlow("http://localhost:5000", 2.0)
1417
```
18+
1519
"""
1620
struct MLFlow
1721
baseuri::String
@@ -30,6 +34,12 @@ Represents an MLFlow experiment.
3034
- `experiment_id::Integer`: experiment identifier.
3135
- `tags::Any`: list of tags.
3236
- `artifact_location::String`: where are experiment artifacts stored.
37+
38+
# Constructors
39+
40+
- `MLFlowExperiment(name, lifecycle_stage, experiment_id, tags, artifact_location)`
41+
- `MLFlowExperiment(exp::Dict{String,Any})`
42+
3343
"""
3444
struct MLFlowExperiment
3545
name::String
@@ -60,11 +70,14 @@ Represents the status of an MLFlow Run.
6070
# Fields
6171
- `status::String`: one of RUNNING/SCHEDULED/FINISHED/FAILED/KILLED
6272
73+
# Constructors
74+
75+
- `MLFlowRunStatus(status::String)`
6376
"""
6477
struct MLFlowRunStatus
6578
status::String
6679

67-
function MLFlowRunStatus(status)
80+
function MLFlowRunStatus(status::String)
6881
acceptable_statuses = ["RUNNING", "SCHEDULED", "FINISHED", "FAILED", "KILLED"]
6982
status acceptable_statuses || error("Invalid status $status - choose one of $acceptable_statuses")
7083
new(status)
@@ -77,13 +90,18 @@ end
7790
Represents run metadata.
7891
7992
# Fields
80-
- `run_id::String`
81-
- `experiment_id::Integer`
82-
- `status::MLFlowRunStatus`
83-
- `start_time::Union{Int64,Missing}`
84-
- `end_time::Union{Int64,Missing}`
85-
- `artifact_uri::String`
86-
- `lifecycle_stage::String`
93+
- `run_id::String`: run identifier.
94+
- `experiment_id::Integer`: experiment identifier.
95+
- `status::MLFlowRunStatus`: run status.
96+
- `start_time::Union{Int64,Missing}`: when was the run started, UNIX time in milliseconds.
97+
- `end_time::Union{Int64,Missing}`: when did the run end, UNIX time in milliseconds.
98+
- `artifact_uri::String`: where are artifacts from this run stored.
99+
- `lifecycle_stage::String`: one of `active` or `deleted`.
100+
101+
# Constructors
102+
103+
- `MLFlowRunInfo(run_id, experiment_id, status, start_time, end_time, artifact_uri, lifecycle_stage)`
104+
- `MLFlowRunInfo(info::Dict{String,Any})`
87105
"""
88106
struct MLFlowRunInfo
89107
run_id::String
@@ -127,26 +145,68 @@ struct MLFlowRunInfo
127145
end
128146
end
129147

148+
"""
149+
MLFlowRunDataMetric
150+
151+
Represents a metric.
152+
153+
# Fields
154+
- `key::String`: metric identifier.
155+
- `value::Float64`: metric value.
156+
- `step::Int64`: step.
157+
- `timestamp::Int64`: timestamp in UNIX time in milliseconds.
158+
159+
# Constructors
160+
161+
- `MLFlowRunDataMetric(d::Dict{String,Any})`
162+
163+
"""
164+
struct MLFlowRunDataMetric
165+
key::String
166+
value::Float64
167+
step::Int64
168+
timestamp::Int64
169+
function MLFlowRunDataMetric(d::Dict{String,Any})
170+
key = d["key"]
171+
value = d["value"]
172+
step = parse(Int64, d["step"])
173+
timestamp = parse(Int64, d["timestamp"])
174+
new(key, value, step, timestamp)
175+
end
176+
end
177+
178+
130179
"""
131180
MLFlowRunData
132181
133182
Represents run data.
134183
135184
# Fields
136-
- `metrics`
137-
- `params`
138-
- `tags`
185+
- `metrics::Vector{MLFlowRunDataMetric}`: run metrics.
186+
- `params::Dict{String,String}`: run parameters.
187+
- `tags`: list of run tags.
139188
140-
# TODO
141-
Incomplete functionality.
189+
# Constructors
190+
191+
- `MLFlowRunData(data::Dict{String,Any})`
142192
143193
"""
144194
struct MLFlowRunData
145-
metrics
146-
params
195+
metrics::Vector{MLFlowRunDataMetric}
196+
params::Union{Dict{String,String},Missing}
147197
tags
148198
function MLFlowRunData(data::Dict{String,Any})
149-
new([], [], []) # TODO: add functionality
199+
metrics = haskey(data, "metrics") ? MLFlowRunDataMetric.(data["metrics"]) : MLFlowRunDataMetric[]
200+
if haskey(data, "params")
201+
params = Dict{String,String}()
202+
for p in data["params"]
203+
params[p["key"]] = p["value"]
204+
end
205+
else
206+
params = Dict{String,String}()
207+
end
208+
tags = haskey(data, "tags") ? data["tags"] : missing
209+
new(metrics, params, tags)
150210
end
151211
end
152212

@@ -158,11 +218,23 @@ Represents an MLFlow run.
158218
# Fields
159219
- `info::MLFlowRunInfo`: Run metadata.
160220
- `data::MLFlowRunData`: Run data.
221+
222+
# Constructors
223+
224+
- `MLFlowRun(rundata::MLFlowRunData)`
225+
- `MLFlowRun(runinfo::MLFlowRunInfo)`
226+
- `MLFlowRun(info::Dict{String,Any})`
227+
- `MLFlowRun(info::Dict{String,Any}, data::Dict{String,Any})`
228+
161229
"""
162230
struct MLFlowRun
163-
info::MLFlowRunInfo
231+
info::Union{MLFlowRunInfo,Missing}
164232
data::Union{MLFlowRunData,Missing}
165233

234+
function MLFlowRun(rundata::MLFlowRunData)
235+
info = missing
236+
new(info, rundata)
237+
end
166238
function MLFlowRun(runinfo::MLFlowRunInfo)
167239
data = missing
168240
new(runinfo, data)

0 commit comments

Comments
 (0)