Skip to content

Commit 8fb4a34

Browse files
authored
Merge pull request #8 from jbrea/caching
save downloaded arff file as artifact
2 parents 14c7da5 + 8b3d15b commit 8fb4a34

File tree

4 files changed

+32
-7
lines changed

4 files changed

+32
-7
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
88
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
99
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
1010
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
11+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1112
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
1213

1314
[compat]

src/OpenML.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ using JSON
55
import ARFFFiles
66
import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
77
using Markdown
8+
if VERSION > v"1.3.0"
9+
using Pkg.Artifacts
10+
end
811

912
export OpenML
1013

src/data.jl

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ const API_URL = "https://www.openml.org/api/v1/json"
55
# https://github.com/openml/OpenML/tree/master/openml_OS/views/pages/api_new/v1/xsd
66
# https://www.openml.org/api_docs#!/data/get_data_id
77

8-
# TODO:
9-
# - Use e.g. DataDeps to cache data locally
10-
# - Put the ARFF parser to a separate package or use ARFFFiles when
11-
# https://github.com/cjdoris/ARFFFiles.jl/issues/4 is fixed.
128

139
"""
1410
Returns information about a dataset. The information includes the name,
@@ -58,9 +54,24 @@ df = DataFrame(table);
5854
```
5955
"""
6056
function load(id::Int; parser = :arff)
61-
response = load_Dataset_Description(id)
62-
arff_file = HTTP.request("GET", response["data_set_description"]["url"])
63-
data = ARFFFiles.load(IOBuffer(arff_file.body))
57+
if VERSION > v"1.3.0"
58+
dir = first(Artifacts.artifacts_dirs())
59+
toml = joinpath(dir, "OpenMLArtifacts.toml")
60+
hash = artifact_hash(string(id), toml)
61+
if hash === nothing || !artifact_exists(hash)
62+
hash = Artifacts.create_artifact() do artifact_dir
63+
url = load_Dataset_Description(id)["data_set_description"]["url"]
64+
download(url, joinpath(artifact_dir, "$id.arff"))
65+
end
66+
bind_artifact!(toml, string(id), hash)
67+
end
68+
filename = joinpath(artifact_path(hash), "$id.arff")
69+
else
70+
url = load_Dataset_Description(id)["data_set_description"]["url"]
71+
filename = tempname()
72+
download(url, filename)
73+
end
74+
data = ARFFFiles.load(filename)
6475
if parser == :auto
6576
return coerce(data, autotype(data))
6677
else

test/data.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,15 @@ end
4040
@test length(filters_test["data"]["dataset"][1]) == offset
4141
end
4242

43+
if VERSION > v"1.3.0"
44+
using Pkg.Artifacts
45+
@testset "artifacts" begin
46+
dir = first(Artifacts.artifacts_dirs())
47+
toml = joinpath(dir, "OpenMLArtifacts.toml")
48+
hash = artifact_hash("61", toml)
49+
@test artifact_exists(hash)
50+
end
51+
end
52+
4353
end
4454
true

0 commit comments

Comments
 (0)