@@ -5,10 +5,6 @@ const API_URL = "https://www.openml.org/api/v1/json"
5
5
# https://github.com/openml/OpenML/tree/master/openml_OS/views/pages/api_new/v1/xsd
6
6
# https://www.openml.org/api_docs#!/data/get_data_id
7
7
8
- # TODO :
9
- # - Use e.g. DataDeps to cache data locally
10
- # - Put the ARFF parser to a separate package or use ARFFFiles when
11
- # https://github.com/cjdoris/ARFFFiles.jl/issues/4 is fixed.
12
8
13
9
"""
14
10
Returns information about a dataset. The information includes the name,
@@ -47,6 +43,8 @@ With `parser = :arff` (default) the ARFFFiles.jl parser is used.
47
43
With `parser = :auto` the output of the ARFFFiles parser is coerced to
48
44
automatically detected scientific types.
49
45
46
+ Datasets are saved as julia artifacts so that they persist locally once loaded.
47
+
50
48
Returns a table.
51
49
52
50
# Examples
@@ -58,9 +56,24 @@ df = DataFrame(table);
58
56
```
59
57
"""
60
58
function load (id:: Int ; parser = :arff )
61
- response = load_Dataset_Description (id)
62
- arff_file = HTTP. request (" GET" , response[" data_set_description" ][" url" ])
63
- data = ARFFFiles. load (IOBuffer (arff_file. body))
59
+ if VERSION > v " 1.3.0"
60
+ dir = first (Artifacts. artifacts_dirs ())
61
+ toml = joinpath (dir, " OpenMLArtifacts.toml" )
62
+ hash = artifact_hash (string (id), toml)
63
+ if hash === nothing || ! artifact_exists (hash)
64
+ hash = Artifacts. create_artifact () do artifact_dir
65
+ url = load_Dataset_Description (id)[" data_set_description" ][" url" ]
66
+ download (url, joinpath (artifact_dir, " $id .arff" ))
67
+ end
68
+ bind_artifact! (toml, string (id), hash)
69
+ end
70
+ filename = joinpath (artifact_path (hash), " $id .arff" )
71
+ else
72
+ url = load_Dataset_Description (id)[" data_set_description" ][" url" ]
73
+ filename = tempname ()
74
+ download (url, filename)
75
+ end
76
+ data = ARFFFiles. load (filename)
64
77
if parser == :auto
65
78
return coerce (data, autotype (data))
66
79
else
0 commit comments