Skip to content

Commit c1550e1

Browse files
authored
Merge pull request #16 from JuliaAI/listartifacts
listartifacts
2 parents 271a2dc + 12e8b22 commit c1550e1

File tree

7 files changed

+309
-162
lines changed

7 files changed

+309
-162
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
1313
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
1414

1515
[compat]
16-
ShowCases = "0.1"
1716
FilePathsBase = "0.9"
1817
HTTP = "0.9"
1918
JSON = "0.21"
19+
ShowCases = "0.1"
2020
URIs = "1"
2121
julia = "1"
2222

docs/src/reference.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ MLFlowRunInfo
1616
MLFlowRunData
1717
MLFlowRunDataMetric
1818
MLFlowRunStatus
19+
MLFlowArtifactFileInfo
20+
MLFlowArtifactDirInfo
1921
```
2022

2123
# Experiments
@@ -36,14 +38,10 @@ getrun
3638
updaterun
3739
deleterun
3840
searchruns
39-
```
40-
41-
# Logging
42-
43-
```@docs
4441
logparam
4542
logmetric
4643
logartifact
44+
listartifacts
4745
```
4846

4947
# Utilities

src/MLFlowClient.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ export
3232
MLFlowRunDataMetric,
3333
MLFlowRun,
3434
get_info,
35-
get_data
35+
get_data,
36+
MLFlowArtifactFileInfo,
37+
MLFlowArtifactDirInfo,
38+
get_path,
39+
get_size
3640

3741
include("utils.jl")
3842
export
@@ -52,12 +56,10 @@ export
5256
getrun,
5357
updaterun,
5458
deleterun,
55-
searchruns
56-
57-
include("logging.jl")
58-
export
59+
searchruns,
5960
logparam,
6061
logmetric,
61-
logartifact
62+
logartifact,
63+
listartifacts
6264

6365
end

src/logging.jl

Lines changed: 0 additions & 136 deletions
This file was deleted.

src/runs.jl

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,196 @@ searchruns(mlf::MLFlow, exp::MLFlowExperiment; kwargs...) =
184184
searchruns(mlf, exp.experiment_id; kwargs...)
185185
searchruns(mlf::MLFlow, exps::AbstractVector{MLFlowExperiment}; kwargs...) =
186186
searchruns(mlf, [getfield.(exps, :experiment_id)]; kwargs...)
187+
188+
189+
"""
190+
logparam(mlf::MLFlow, run, key, value)
191+
logparam(mlf::MLFlow, run, kv)
192+
193+
Associates a key/value pair of parameters to the particular run.
194+
195+
# Arguments
196+
- `mlf`: [`MLFlow`](@ref) configuration.
197+
- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`.
198+
- `key`: parameter key (name). Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports.
199+
- `value`: parameter value. Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports.
200+
201+
One could also specify `kv::Dict` instead of separate `key` and `value` arguments.
202+
"""
203+
function logparam(mlf::MLFlow, run_id::String, key, value)
204+
endpoint = "runs/log-parameter"
205+
mlfpost(mlf, endpoint; run_id=run_id, key=string(key), value=string(value))
206+
end
207+
logparam(mlf::MLFlow, run_info::MLFlowRunInfo, key, value) =
208+
logparam(mlf, run_info.run_id, key, value)
209+
logparam(mlf::MLFlow, run::MLFlowRun, key, value) =
210+
logparam(mlf, run.info, key, value)
211+
function logparam(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, kv)
212+
for (k, v) in kv
213+
logparam(mlf, run, k, v)
214+
end
215+
end
216+
217+
"""
218+
logmetric(mlf::MLFlow, run, key, value::T; timestamp, step) where T<:Real
219+
logmetric(mlf::MLFlow, run, key, values::AbstractArray{T}; timestamp, step) where T<:Real
220+
221+
Logs a metric value (or values) against a particular run.
222+
223+
# Arguments
224+
- `mlf`: [`MLFlow`](@ref) configuration.
225+
- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`
226+
- `key`: metric name.
227+
- `value`: metric value, must be numeric.
228+
229+
# Keywords
230+
- `timestamp`: if provided, must be a UNIX timestamp in milliseconds. By default, set to current time.
231+
- `step`: step at which the metric value has been taken.
232+
"""
233+
function logmetric(mlf::MLFlow, run_id::String, key, value::T; timestamp=missing, step=missing) where T<:Real
234+
endpoint = "runs/log-metric"
235+
if ismissing(timestamp)
236+
timestamp = Int(trunc(datetime2unix(now()) * 1000))
237+
end
238+
mlfpost(mlf, endpoint; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step)
239+
end
240+
logmetric(mlf::MLFlow, run_info::MLFlowRunInfo, key, value::T; timestamp=missing, step=missing) where T<:Real =
241+
logmetric(mlf::MLFlow, run_info.run_id, key, value; timestamp=timestamp, step=step)
242+
logmetric(mlf::MLFlow, run::MLFlowRun, key, value::T; timestamp=missing, step=missing) where T<:Real =
243+
logmetric(mlf, run.info, key, value; timestamp=timestamp, step=step)
244+
245+
function logmetric(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, key, values::AbstractArray{T}; timestamp=missing, step=missing) where T<:Real
246+
for v in values
247+
logmetric(mlf, run, key, v; timestamp=timestamp, step=step)
248+
end
249+
end
250+
251+
252+
"""
253+
logartifact(mlf::MLFlow, run, basefilename, data)
254+
255+
Stores an artifact (file) in the run's artifact location.
256+
257+
!!! note
258+
Assumes that artifact_uri is mapped to a local directory.
259+
At the moment, this only works if both MLFlow and the client are running on the same host or they map a directory that leads to the same location over NFS, for example.
260+
261+
# Arguments
262+
- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed.
263+
- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`.
264+
- `basefilename`: name of the file to be written.
265+
- `data`: artifact content, an object that can be written directly to a file handle.
266+
267+
# Throws
268+
- an `ErrorException` if an exception occurs during writing artifact.
269+
270+
# Returns
271+
path of the artifact that was created.
272+
"""
273+
function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data)
274+
mlflowrun = getrun(mlf, run_id)
275+
artifact_uri = mlflowrun.info.artifact_uri
276+
mkpath(artifact_uri)
277+
filepath = joinpath(artifact_uri, basefilename)
278+
try
279+
f = open(filepath, "w")
280+
write(f, data)
281+
close(f)
282+
catch e
283+
error("Unable to create artifact $(filepath): $e")
284+
end
285+
filepath
286+
end
287+
logartifact(mlf::MLFlow, run::MLFlowRun, basefilename::AbstractString, data) =
288+
logartifact(mlf, run.info, basefilename, data)
289+
logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, basefilename::AbstractString, data) =
290+
logartifact(mlf, run_info.run_id, basefilename, data)
291+
292+
"""
293+
logartifact(mlf::MLFlow, run, filepath)
294+
295+
Stores an artifact (file) in the run's artifact location.
296+
The name of the artifact is calculated using `basename(filepath)`.
297+
298+
Dispatches on `logartifact(mlf::MLFlow, run, basefilename, data)` where `data` is the contents of `filepath`.
299+
300+
# Throws
301+
- an `ErrorException` if `filepath` does not exist.
302+
- an exception if such occurs while trying to read the contents of `filepath`.
303+
304+
"""
305+
function logartifact(mlf::MLFlow, run_id::AbstractString, filepath::Union{AbstractPath,AbstractString})
306+
isfile(filepath) || error("File $filepath does not exist.")
307+
try
308+
f = open(filepath, "r")
309+
data = read(f)
310+
close(f)
311+
return logartifact(mlf, run_id, basename(filepath), data)
312+
catch e
313+
throw(e)
314+
finally
315+
if @isdefined f
316+
close(f)
317+
end
318+
end
319+
end
320+
logartifact(mlf::MLFlow, run::MLFlowRun, filepath::Union{AbstractPath,AbstractString}) =
321+
logartifact(mlf, run.info, filepath)
322+
logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, filepath::Union{AbstractPath,AbstractString}) =
323+
logartifact(mlf, run_info.run_id, filepath)
324+
325+
"""
326+
listartifacts(mlf::MLFlow, run)
327+
328+
Lists the artifacts associated with an experiment run.
329+
According to [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#list-artifacts), this API endpoint should return paged results, similar to [`searchruns`](@ref).
330+
However, after some experimentation, this doesn't seem to be the case. Therefore, the paging functionality is not implemented here.
331+
332+
# Arguments
333+
- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed.
334+
- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`.
335+
336+
# Keywords
337+
- `path::String`: path of a directory within the artifact location. If set, returns the contents of the directory. By default, this is the root directory of the artifacts.
338+
- `maxdepth::Int64`: depth of listing. Default is 1. This will only return the files/directories in the current `path`. To return all artifacts files and directories, use `maxdepth=-1`.
339+
340+
# Returns
341+
A vector of `Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}`.
342+
"""
343+
function listartifacts(mlf::MLFlow, run_id::String; path::String="", maxdepth::Int64=1)
344+
endpoint = "artifacts/list"
345+
kwargs = (
346+
run_id=run_id,
347+
)
348+
kwargs = (; kwargs..., path=path)
349+
httpresult = mlfget(mlf, endpoint; kwargs...)
350+
"files" keys(httpresult) || return Vector{Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}}()
351+
"root_uri" keys(httpresult) || error("Malformed response from MLFlow REST API.")
352+
root_uri = httpresult["root_uri"]
353+
result = Vector{Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}}()
354+
maxdepth == 0 && return result
355+
356+
for resultentry httpresult["files"]
357+
if resultentry["is_dir"] == false
358+
filepath = joinpath(root_uri, resultentry["path"])
359+
filesize = parse(Int, resultentry["file_size"])
360+
push!(result, MLFlowArtifactFileInfo(filepath, filesize))
361+
elseif resultentry["is_dir"] == true
362+
dirpath = joinpath(root_uri, resultentry["path"])
363+
push!(result, MLFlowArtifactDirInfo(dirpath))
364+
if maxdepth != 0
365+
nextdepthresult = listartifacts(mlf, run_id, path=resultentry["path"], maxdepth=maxdepth-1)
366+
result = vcat(result, nextdepthresult)
367+
end
368+
else
369+
isdirval = resultentry["is_dir"]
370+
@warn "Malformed response from MLFlow REST API is_dir=$isdirval - skipping"
371+
continue
372+
end
373+
end
374+
result
375+
end
376+
listartifacts(mlf::MLFlow, run::MLFlowRun; kwargs...) =
377+
listartifacts(mlf, run.info.run_id; kwargs...)
378+
listartifacts(mlf::MLFlow, run_info::MLFlowRunInfo; kwargs...) =
379+
listartifacts(mlf, run_info.run_id; kwargs...)

0 commit comments

Comments
 (0)