Merge pull request #4 from JuliaAI/runsearch

deyandyankov · web-flow · commit 677dddf750ff · 2021-11-15T22:09:46.000Z
Runsearch
diff --git a/docs/src/reference.md b/docs/src/reference.md
@@ -12,6 +12,7 @@ MLFlowExperiment
 MLFlowRun
 MLFlowRunInfo
 MLFlowRunData
+MLFlowRunDataMetric
 MLFlowRunStatus
 ```
 
@@ -31,6 +32,7 @@ createrun
 getrun
 updaterun
 deleterun
+searchruns
 ```
 
 # Logging
diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl
@@ -25,6 +25,7 @@ export
     MLFlowRunStatus,
     MLFlowRunInfo,
     MLFlowRunData,
+    MLFlowRunDataMetric,
     MLFlowRun
 
 include("utils.jl")
@@ -41,7 +42,8 @@ export
     createrun,
     getrun,
     updaterun,
-    deleterun
+    deleterun,
+    searchruns
 
 include("logging.jl")
 export
diff --git a/src/runs.jl b/src/runs.jl
@@ -95,3 +95,65 @@ end
 deleterun(mlf::MLFlow, run_info::MLFlowRunInfo) = deleterun(mlf, run_info.run_id)
 deleterun(mlf::MLFlow, run::MLFlowRun) = deleterun(mlf, run.info)
 
+"""
+    searchruns(mlf::MLFlow, experiment_ids)
+
+Searches for runs in an experiment.
+
+# Arguments
+- `mlf`: [`MLFlow`](@ref) configuration.
+- `experiment_ids::AbstractVector{Integer}`: `experiment_id`s in which to search for runs. Can also be a single `Integer`.
+
+# Keywords
+- `filter::String`: filter as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs)
+- `run_view_type::String`: one of `ACTIVE_ONLY`, `DELETED_ONLY`, or `ALL`.
+- `max_results::Integer`: 50,000 by default.
+- `order_by::String`: as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs)
+- `page_token::String`: paging functionality, handled automatically. Not meant to be passed by the user.
+
+# Returns
+- vector of [`MLFlowRun`](@ref) runs that were found in the list of experiments.
+
+"""
+function searchruns(mlf::MLFlow, experiment_ids::AbstractVector{<:Integer};
+                    filter::String="",
+                    run_view_type::String="ACTIVE_ONLY",
+                    max_results::Int64=50000,
+                    order_by::AbstractVector{<:String}=["attribute.start_time"],
+                    page_token::String=""
+                    )
+    endpoint = "runs/search"
+    run_view_type ∈ ["ACTIVE_ONLY", "DELETED_ONLY", "ALL"] || error("Unsupported run_view_type = $run_view_type")
+    kwargs = (
+        experiment_ids=experiment_ids,
+        filter=filter,
+        run_view_type=run_view_type,
+        max_results=max_results,
+        order_by=order_by
+    )
+    if !isempty(page_token)
+        kwargs = (; kwargs..., page_token=page_token)
+    end
+
+    result = mlfpost(mlf, endpoint; kwargs...)
+    haskey(result, "runs") || return MLFlowRun[]
+
+    runs = map(x -> MLFlowRun(x["info"], x["data"]), result["runs"])
+
+    # paging functionality using recursion
+    if haskey(result, "next_page_token") && !isempty(result["next_page_token"])
+        kwargs = (
+            filter=filter,
+            run_view_type=run_view_type,
+            max_results=max_results,
+            order_by=order_by,
+            page_token=result["next_page_token"]
+        )
+        nextruns = searchruns(mlf, experiment_ids; kwargs...)
+        return vcat(runs, nextruns)
+    end
+
+    runs
+end
+searchruns(mlf::MLFlow, experiment_id::Integer; kwargs...) =
+    searchruns(mlf, [experiment_id]; kwargs...)
diff --git a/src/types.jl b/src/types.jl
@@ -1,17 +1,21 @@
 """
-    MLFlow(baseuri; apiversion)
+    MLFlow
 
 Base type which defines location and version for MLFlow API service.
 
 # Fields
 - `baseuri::String`: base MLFlow tracking URI, e.g. `http://localhost:5000`
 - `apiversion`: used API version, e.g. `2.0`
 
+# Constructors
+
+- `MLFlow(baseuri; apiversion=2.0)`
 # Examples
 ``` julia-repl
 julia> mlf = MLFlow("http://localhost:5000")
 MLFlow("http://localhost:5000", 2.0)
 ```
+
 """
 struct MLFlow
     baseuri::String
@@ -30,6 +34,12 @@ Represents an MLFlow experiment.
 - `experiment_id::Integer`: experiment identifier.
 - `tags::Any`: list of tags.
 - `artifact_location::String`: where are experiment artifacts stored.
+
+# Constructors
+
+- `MLFlowExperiment(name, lifecycle_stage, experiment_id, tags, artifact_location)`
+- `MLFlowExperiment(exp::Dict{String,Any})`
+
 """
 struct MLFlowExperiment
     name::String
@@ -60,11 +70,14 @@ Represents the status of an MLFlow Run.
 # Fields
 - `status::String`: one of RUNNING/SCHEDULED/FINISHED/FAILED/KILLED
 
+# Constructors
+
+- `MLFlowRunStatus(status::String)`
 """
 struct MLFlowRunStatus
     status::String
 
-    function MLFlowRunStatus(status)
+    function MLFlowRunStatus(status::String)
         acceptable_statuses = ["RUNNING", "SCHEDULED", "FINISHED", "FAILED", "KILLED"]
         status ∈ acceptable_statuses || error("Invalid status $status - choose one of $acceptable_statuses")
         new(status)
@@ -77,13 +90,18 @@ end
 Represents run metadata.
 
 # Fields
-- `run_id::String`
-- `experiment_id::Integer`
-- `status::MLFlowRunStatus` 
-- `start_time::Union{Int64,Missing}`
-- `end_time::Union{Int64,Missing}`
-- `artifact_uri::String`
-- `lifecycle_stage::String`
+- `run_id::String`: run identifier.
+- `experiment_id::Integer`: experiment identifier.
+- `status::MLFlowRunStatus`: run status.
+- `start_time::Union{Int64,Missing}`: when was the run started, UNIX time in milliseconds.
+- `end_time::Union{Int64,Missing}`: when did the run end, UNIX time in milliseconds.
+- `artifact_uri::String`: where are artifacts from this run stored.
+- `lifecycle_stage::String`: one of `active` or `deleted`.
+
+# Constructors
+
+- `MLFlowRunInfo(run_id, experiment_id, status, start_time, end_time, artifact_uri, lifecycle_stage)`
+- `MLFlowRunInfo(info::Dict{String,Any})`
 """
 struct MLFlowRunInfo
     run_id::String
@@ -127,26 +145,68 @@ struct MLFlowRunInfo
     end
 end
 
+"""
+    MLFlowRunDataMetric
+
+Represents a metric.
+
+# Fields
+- `key::String`: metric identifier.
+- `value::Float64`: metric value.
+- `step::Int64`: step.
+- `timestamp::Int64`: timestamp in UNIX time in milliseconds.
+
+# Constructors
+
+- `MLFlowRunDataMetric(d::Dict{String,Any})`
+
+"""
+struct MLFlowRunDataMetric
+    key::String
+    value::Float64
+    step::Int64
+    timestamp::Int64
+    function MLFlowRunDataMetric(d::Dict{String,Any})
+        key = d["key"]
+        value = d["value"]
+        step = parse(Int64, d["step"])
+        timestamp = parse(Int64, d["timestamp"])
+        new(key, value, step, timestamp)
+    end
+end
+
+
 """
     MLFlowRunData
 
 Represents run data.
 
 # Fields
-- `metrics`
-- `params`
-- `tags`
+- `metrics::Vector{MLFlowRunDataMetric}`: run metrics.
+- `params::Dict{String,String}`: run parameters.
+- `tags`: list of run tags.
 
-# TODO
-Incomplete functionality.
+# Constructors
+
+- `MLFlowRunData(data::Dict{String,Any})`
 
 """
 struct MLFlowRunData
-    metrics
-    params
+    metrics::Vector{MLFlowRunDataMetric}
+    params::Union{Dict{String,String},Missing}
     tags
     function MLFlowRunData(data::Dict{String,Any})
-        new([], [], []) # TODO: add functionality
+        metrics = haskey(data, "metrics") ? MLFlowRunDataMetric.(data["metrics"]) : MLFlowRunDataMetric[]
+        if haskey(data, "params")
+            params = Dict{String,String}()
+            for p in data["params"]
+                params[p["key"]] = p["value"]
+            end
+        else
+            params = Dict{String,String}()
+        end
+        tags = haskey(data, "tags") ? data["tags"] : missing
+        new(metrics, params, tags)
     end
 end
 
@@ -158,11 +218,23 @@ Represents an MLFlow run.
 # Fields
 - `info::MLFlowRunInfo`: Run metadata.
 - `data::MLFlowRunData`: Run data.
+
+# Constructors
+
+- `MLFlowRun(rundata::MLFlowRunData)`
+- `MLFlowRun(runinfo::MLFlowRunInfo)`
+- `MLFlowRun(info::Dict{String,Any})`
+- `MLFlowRun(info::Dict{String,Any}, data::Dict{String,Any})`
+
 """
 struct MLFlowRun
-    info::MLFlowRunInfo
+    info::Union{MLFlowRunInfo,Missing}
     data::Union{MLFlowRunData,Missing}
 
+    function MLFlowRun(rundata::MLFlowRunData)
+        info = missing
+        new(info, rundata)
+    end
     function MLFlowRun(runinfo::MLFlowRunInfo)
         data = missing
         new(runinfo, data)
diff --git a/test/runtests.jl b/test/runtests.jl