basic run search. More tests needed.

deyandyankov · deyandyankov · commit b90da1c2735c · 2021-11-15T13:01:46.000Z
diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl
@@ -25,6 +25,7 @@ export
     MLFlowRunStatus,
     MLFlowRunInfo,
     MLFlowRunData,
+    MLFlowRunDataMetric,
     MLFlowRun
 
 include("utils.jl")
@@ -41,7 +42,8 @@ export
     createrun,
     getrun,
     updaterun,
-    deleterun
+    deleterun,
+    searchruns
 
 include("logging.jl")
 export
diff --git a/src/runs.jl b/src/runs.jl
@@ -95,3 +95,48 @@ end
 deleterun(mlf::MLFlow, run_info::MLFlowRunInfo) = deleterun(mlf, run_info.run_id)
 deleterun(mlf::MLFlow, run::MLFlowRun) = deleterun(mlf, run.info)
 
+"""
+    searchruns(mlf::MLFlow, experiment_ids, filter)
+
+Searches for runs in an experiment based on filter.
+
+# Arguments
+- `mlf`: [`MLFlow`](@ref) configuration.
+- `experiment_ids::AbstractVector{Integer}`: `experiment_id`s in which to search for runs.
+
+# Keywords
+- `filter::String`: filter as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs)
+- `run_view_type::String`: ...
+- `max_results::Integer`: ...
+- `order_by::String`: ...
+
+# Returns
+- a vector of runs that were found
+
+"""
+function searchruns(mlf::MLFlow, experiment_ids::AbstractVector{<:Integer};
+                    filter::String="",
+                    run_view_type::String="ACTIVE_ONLY",
+                    max_results::Int64=50000,
+                    order_by::AbstractVector{<:String}=[""]
+                    )
+    endpoint = "runs/search"
+    run_view_type ∈ ["ACTIVE_ONLY", "DELETED_ONLY", "ALL"] || error("Unsupported run_view_type = $run_view_type")
+    kwargs = (
+        experiment_ids=experiment_ids,
+        filter=filter,
+        run_view_type=run_view_type,
+        max_results=max_results,
+    )
+    if order_by != [""]
+        kwargs.order_by = order_by
+    end
+
+    result = mlfpost(mlf, endpoint; kwargs...)
+    haskey(result, "runs") || error("Malformed result from MLFow")
+
+    map(x -> MLFlowRun(x["info"], x["data"]), result["runs"])
+end
+function searchruns(mlf::MLFlow, experiment_id::Integer; kwargs...)
+    searchruns(mlf, [experiment_id]; kwargs...)
+end
diff --git a/src/types.jl b/src/types.jl
@@ -127,26 +127,59 @@ struct MLFlowRunInfo
     end
 end
 
+"""
+    MLFlowRunDataMetric
+
+Represents a metric.
+
+# Fields
+- `key::String`: ...
+- `value`: ...
+- `step::Int64`: ...
+- `timestamp::Int64`: ...
+"""
+struct MLFlowRunDataMetric
+    key::String
+    value::Float64
+    step::Int64
+    timestamp::Int64
+    function MLFlowRunDataMetric(d::Dict{String,Any})
+        key = d["key"]
+        value = d["value"]
+        step = parse(Int64, d["step"])
+        timestamp = parse(Int64, d["timestamp"])
+        new(key, value, step, timestamp)
+    end
+end
+
+
 """
     MLFlowRunData
 
 Represents run data.
 
 # Fields
-- `metrics`
-- `params`
+- `metrics::Vector{MLFlowRunDataMetric}`: run metrics.
+- `params::Dict{String,String}`: run parameters.
 - `tags`
 
-# TODO
-Incomplete functionality.
-
 """
 struct MLFlowRunData
-    metrics
-    params
+    metrics::Vector{MLFlowRunDataMetric}
+    params::Union{Dict{String,String},Missing}
     tags
     function MLFlowRunData(data::Dict{String,Any})
-        new([], [], []) # TODO: add functionality
+        metrics = haskey(data, "metrics") ? MLFlowRunDataMetric.(data["metrics"]) : MLFlowRunDataMetric[]
+        if haskey(data, "params")
+            params = Dict{String,String}()
+            for p in data["params"]
+                params[p["key"]] = p["value"]
+            end
+        else
+            params = Dict{String,String}()
+        end
+        tags = haskey(data, "tags") ? data["tags"] : missing
+        new(metrics, params, tags)
     end
 end
 
@@ -158,11 +191,16 @@ Represents an MLFlow run.
 # Fields
 - `info::MLFlowRunInfo`: Run metadata.
 - `data::MLFlowRunData`: Run data.
+
 """
 struct MLFlowRun
-    info::MLFlowRunInfo
+    info::Union{MLFlowRunInfo,Missing}
     data::Union{MLFlowRunData,Missing}
 
+    function MLFlowRun(rundata::MLFlowRunData)
+        info = missing
+        new(info, rundata)
+    end
     function MLFlowRun(runinfo::MLFlowRunInfo)
         data = missing
         new(runinfo, data)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -18,63 +18,71 @@ end
     @test mlf.baseuri == mlflowbaseuri
     @test mlf.apiversion == 2.0
 
-    if mlflow_server_is_running(mlf)
-
-        exptags = [:key => "val"]
-        expname = "expname-$(UUIDs.uuid4())"
-
-        @test ismissing(getexperiment(mlf, "$(UUIDs.uuid4()) - $(UUIDs.uuid4())"))
-
-        experiment_id = createexperiment(mlf; name=expname, tags=exptags)
-        experiment = getexperiment(mlf, experiment_id)
-        @test experiment.experiment_id == experiment_id
-        experimentbyname = getexperiment(mlf, expname)
-        @test experimentbyname.name == experiment.name
-
-
-        exprun = createrun(mlf, experiment_id)
-        @test exprun.info.experiment_id == experiment_id
-        @test exprun.info.lifecycle_stage == "active"
-        @test exprun.info.status == MLFlowRunStatus("RUNNING")
-        exprunid = exprun.info.run_id
-
-        logparam(mlf, exprunid, "paramkey", "paramval")
-        logparam(mlf, exprunid, Dict("k" => "v", "k1" => "v1"))
-        logparam(mlf, exprun, Dict("test1" => "test2"))
-
-        logmetric(mlf, exprun, "metrickeyrun", 1.0)
-        logmetric(mlf, exprun.info, "metrickeyrun", 2.0)
-        logmetric(mlf, exprun.info, "metrickeyrun", [2.5, 3.5])
-        logmetric(mlf, exprunid, "metrickey", 1.0)
-        logmetric(mlf, exprunid, "metrickey2", [1.0, 1.5, 2.0])
-
-        retrieved_run = getrun(mlf, exprunid)
-        @test exprun.info == retrieved_run.info
-
-        tmpfiletoupload = tempname()
-        f = open(tmpfiletoupload, "w")
-        write(f, "samplecontents")
-        close(f)
-        logartifact(mlf, retrieved_run, tmpfiletoupload)
-        rm(tmpfiletoupload)
+    if !mlflow_server_is_running(mlf)
+        return nothing
+    end
 
-        running_run = updaterun(mlf, exprunid, "RUNNING")
-        @test running_run.info.experiment_id == experiment_id
-        @test running_run.info.status == MLFlowRunStatus("RUNNING")
-        finished_run = updaterun(mlf, exprun, MLFlowRunStatus("FINISHED")) 
-        finishedrun = getrun(mlf, finished_run.info.run_id)
+    exptags = [:key => "val"]
+    expname = "expname-$(UUIDs.uuid4())"
+
+    @test ismissing(getexperiment(mlf, "$(UUIDs.uuid4()) - $(UUIDs.uuid4())"))
+
+    experiment_id = createexperiment(mlf; name=expname, tags=exptags)
+    experiment = getexperiment(mlf, experiment_id)
+    @test experiment.experiment_id == experiment_id
+    experimentbyname = getexperiment(mlf, expname)
+    @test experimentbyname.name == experiment.name
+
+    exprun = createrun(mlf, experiment_id)
+    @test exprun.info.experiment_id == experiment_id
+    @test exprun.info.lifecycle_stage == "active"
+    @test exprun.info.status == MLFlowRunStatus("RUNNING")
+    exprunid = exprun.info.run_id
+
+    logparam(mlf, exprunid, "paramkey", "paramval")
+    logparam(mlf, exprunid, Dict("k" => "v", "k1" => "v1"))
+    logparam(mlf, exprun, Dict("test1" => "test2"))
+
+    logmetric(mlf, exprun, "metrickeyrun", 1.0)
+    logmetric(mlf, exprun.info, "metrickeyrun", 2.0)
+    logmetric(mlf, exprun.info, "metrickeyrun", [2.5, 3.5])
+    logmetric(mlf, exprunid, "metrickey", 1.0)
+    logmetric(mlf, exprunid, "metrickey2", [1.0, 1.5, 2.0])
+
+    retrieved_run = getrun(mlf, exprunid)
+    @test exprun.info == retrieved_run.info
+
+    tmpfiletoupload = tempname()
+    f = open(tmpfiletoupload, "w")
+    write(f, "samplecontents")
+    close(f)
+    logartifact(mlf, retrieved_run, tmpfiletoupload)
+    rm(tmpfiletoupload)
+
+    running_run = updaterun(mlf, exprunid, "RUNNING")
+    @test running_run.info.experiment_id == experiment_id
+    @test running_run.info.status == MLFlowRunStatus("RUNNING")
+    finished_run = updaterun(mlf, exprun, MLFlowRunStatus("FINISHED"))
+    finishedrun = getrun(mlf, finished_run.info.run_id)
     
-        # NOTE: seems like MLFlow API never returns `end_time` as documented in https://mlflow.org/docs/latest/rest-api.html#runinfo
-        # Consider raising an issue with MLFlow itself.
-        @test_broken !ismissing(finishedrun.info.end_time)
-
-        runs = searchrun(mlf, experiment_id, "params.\"paramkey\" == \"paramval\"")
-
-        deleterun(mlf, exprunid)
-
-        deleteexperiment(mlf, experiment_id)
-        experiment = getexperiment(mlf, experiment_id)
-        @test experiment.experiment_id == experiment_id
-        @test experiment.lifecycle_stage == "deleted"
-    end
+    # NOTE: seems like MLFlow API never returns `end_time` as documented in https://mlflow.org/docs/latest/rest-api.html#runinfo
+    # Consider raising an issue with MLFlow itself.
+    @test_broken !ismissing(finishedrun.info.end_time)
+
+    exprun2 = createrun(mlf, experiment_id)
+    exprun2id = exprun.info.run_id
+    logparam(mlf, exprun2, "param2", "key2")
+    logmetric(mlf, exprun2, "metric2", [1.0, 2.0])
+    updaterun(mlf, exprun2, "FINISHED")
+
+    @show experiment_id
+    runs = searchruns(mlf, experiment_id)
+    @test length(runs) == 2
+   # , "params.\"paramkey\" == \"paramval\"")
+    # deleterun(mlf, exprunid)
+
+    # deleteexperiment(mlf, experiment_id)
+    # experiment = getexperiment(mlf, experiment_id)
+    # @test experiment.experiment_id == experiment_id
+    # @test experiment.lifecycle_stage == "deleted"
 end