Skip to content

Commit 00bb922

Browse files
committed
save downloaded arff file as artifact
1 parent 14c7da5 commit 00bb922

File tree

4 files changed

+22
-7
lines changed

4 files changed

+22
-7
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
88
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
99
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
1010
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
11+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1112
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
1213

1314
[compat]

src/OpenML.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ using JSON
55
import ARFFFiles
66
import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
77
using Markdown
8+
using Pkg.Artifacts
89

910
export OpenML
1011

src/data.jl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ const API_URL = "https://www.openml.org/api/v1/json"
55
# https://github.com/openml/OpenML/tree/master/openml_OS/views/pages/api_new/v1/xsd
66
# https://www.openml.org/api_docs#!/data/get_data_id
77

8-
# TODO:
9-
# - Use e.g. DataDeps to cache data locally
10-
# - Put the ARFF parser to a separate package or use ARFFFiles when
11-
# https://github.com/cjdoris/ARFFFiles.jl/issues/4 is fixed.
128

139
"""
1410
Returns information about a dataset. The information includes the name,
@@ -58,9 +54,17 @@ df = DataFrame(table);
5854
```
5955
"""
6056
function load(id::Int; parser = :arff)
61-
response = load_Dataset_Description(id)
62-
arff_file = HTTP.request("GET", response["data_set_description"]["url"])
63-
data = ARFFFiles.load(IOBuffer(arff_file.body))
57+
dir = first(Artifacts.artifacts_dirs())
58+
toml = joinpath(dir, "OpenMLArtifacts.toml")
59+
hash = artifact_hash(string(id), toml)
60+
if hash === nothing || !artifact_exists(hash)
61+
hash = Artifacts.create_artifact() do artifact_dir
62+
url = load_Dataset_Description(id)["data_set_description"]["url"]
63+
download(url, joinpath(artifact_dir, "$id.arff"))
64+
end
65+
bind_artifact!(toml, string(id), hash)
66+
end
67+
data = ARFFFiles.load(joinpath(artifact_path(hash), "$id.arff"))
6468
if parser == :auto
6569
return coerce(data, autotype(data))
6670
else

test/data.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ using Test
44
using HTTP
55
using OpenML
66
import Tables.istable
7+
using Pkg.Artifacts
78

89
response_test = OpenML.load_Dataset_Description(61)
910
ntp_test = OpenML.load(61)
@@ -40,5 +41,13 @@ end
4041
@test length(filters_test["data"]["dataset"][1]) == offset
4142
end
4243

44+
@testset "artifacts" begin
45+
dir = first(Artifacts.artifacts_dirs())
46+
toml = joinpath(dir, "OpenMLArtifacts.toml")
47+
hash = artifact_hash("61", toml)
48+
@test artifact_exists(hash)
49+
end
50+
51+
4352
end
4453
true

0 commit comments

Comments
 (0)