Skip to content

Commit 818a825

Browse files
authored
Merge pull request #9 from JuliaAI/dev
For a 0.1.1 release
2 parents b5dd7dd + 1a11b2a commit 818a825

File tree

4 files changed

+35
-8
lines changed

4 files changed

+35
-8
lines changed

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
name = "OpenML"
22
uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
33
authors = ["Diego Arenas <[email protected]>", "Anthony D. Blaom <[email protected]>"]
4-
version = "0.1.0"
4+
version = "0.1.1"
55

66
[deps]
77
ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
88
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
99
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
1010
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
11+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1112
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
1213

1314
[compat]

src/OpenML.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ using JSON
55
import ARFFFiles
66
import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
77
using Markdown
8+
if VERSION > v"1.3.0"
9+
using Pkg.Artifacts
10+
end
811

912
export OpenML
1013

src/data.jl

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ const API_URL = "https://www.openml.org/api/v1/json"
55
# https://github.com/openml/OpenML/tree/master/openml_OS/views/pages/api_new/v1/xsd
66
# https://www.openml.org/api_docs#!/data/get_data_id
77

8-
# TODO:
9-
# - Use e.g. DataDeps to cache data locally
10-
# - Put the ARFF parser to a separate package or use ARFFFiles when
11-
# https://github.com/cjdoris/ARFFFiles.jl/issues/4 is fixed.
128

139
"""
1410
Returns information about a dataset. The information includes the name,
@@ -47,6 +43,8 @@ With `parser = :arff` (default) the ARFFFiles.jl parser is used.
4743
With `parser = :auto` the output of the ARFFFiles parser is coerced to
4844
automatically detected scientific types.
4945
46+
Datasets are saved as julia artifacts so that they persist locally once loaded.
47+
5048
Returns a table.
5149
5250
# Examples
@@ -58,9 +56,24 @@ df = DataFrame(table);
5856
```
5957
"""
6058
function load(id::Int; parser = :arff)
61-
response = load_Dataset_Description(id)
62-
arff_file = HTTP.request("GET", response["data_set_description"]["url"])
63-
data = ARFFFiles.load(IOBuffer(arff_file.body))
59+
if VERSION > v"1.3.0"
60+
dir = first(Artifacts.artifacts_dirs())
61+
toml = joinpath(dir, "OpenMLArtifacts.toml")
62+
hash = artifact_hash(string(id), toml)
63+
if hash === nothing || !artifact_exists(hash)
64+
hash = Artifacts.create_artifact() do artifact_dir
65+
url = load_Dataset_Description(id)["data_set_description"]["url"]
66+
download(url, joinpath(artifact_dir, "$id.arff"))
67+
end
68+
bind_artifact!(toml, string(id), hash)
69+
end
70+
filename = joinpath(artifact_path(hash), "$id.arff")
71+
else
72+
url = load_Dataset_Description(id)["data_set_description"]["url"]
73+
filename = tempname()
74+
download(url, filename)
75+
end
76+
data = ARFFFiles.load(filename)
6477
if parser == :auto
6578
return coerce(data, autotype(data))
6679
else

test/data.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,15 @@ end
4040
@test length(filters_test["data"]["dataset"][1]) == offset
4141
end
4242

43+
if VERSION > v"1.3.0"
44+
using Pkg.Artifacts
45+
@testset "artifacts" begin
46+
dir = first(Artifacts.artifacts_dirs())
47+
toml = joinpath(dir, "OpenMLArtifacts.toml")
48+
hash = artifact_hash("61", toml)
49+
@test artifact_exists(hash)
50+
end
51+
end
52+
4353
end
4454
true

0 commit comments

Comments
 (0)