|
1 | 1 | export Mutagenesis
|
2 | 2 | module Mutagenesis
|
3 | 3 |
|
4 |
| - using DataDeps, JSON |
5 |
| - using ..MLDatasets: datafile |
6 |
| - |
7 |
| - const DEPNAME = "Mutagenesis" |
8 |
| - const DATA = "data.json" |
9 |
| - const METADATA = "meta.json" |
10 |
| - |
11 |
| - function __init__() |
12 |
| - ORIGINAL_LINK = "https://relational.fit.cvut.cz/dataset/Mutagenesis" |
13 |
| - DATA_LINK = "https://raw.githubusercontent.com/CTUAvastLab/datasets/main/mutagenesis" |
14 |
| - |
15 |
| - register(DataDep( |
16 |
| - DEPNAME, |
17 |
| - """ |
18 |
| - Dataset: The $DEPNAME dataset. |
19 |
| - Website: $ORIGINAL_LINK |
20 |
| - License: CC0 |
21 |
| - """, |
22 |
| - "$DATA_LINK/" .* [DATA, METADATA], |
23 |
| - )) |
24 |
| - end |
25 |
| - |
26 |
| - traindata(; dir = nothing) = traindata(dir) |
27 |
| - testdata(; dir = nothing) = testdata(dir) |
28 |
| - valdata(; dir = nothing) = valdata(dir) |
29 |
| - |
30 |
| - function traindata(dir) |
31 |
| - samples, targets, train_idxs, val_idxs, test_idxs = load_data(dir) |
32 |
| - samples[train_idxs], targets[train_idxs] |
33 |
| - end |
34 |
| - |
35 |
| - function testdata(dir) |
36 |
| - samples, targets, train_idxs, val_idxs, test_idxs = load_data(dir) |
37 |
| - samples[test_idxs], targets[test_idxs] |
38 |
| - end |
39 |
| - |
40 |
| - function valdata(dir) |
41 |
| - samples, targets, train_idxs, val_idxs, test_idxs = load_data(dir) |
42 |
| - samples[val_idxs], targets[val_idxs] |
43 |
| - end |
44 |
| - |
45 |
| - function load_data(dir) |
46 |
| - data_path = datafile(DEPNAME, DATA, dir) |
47 |
| - metadata_path = datafile(DEPNAME, METADATA, dir) |
48 |
| - samples = read_data(data_path) |
49 |
| - metadata = read_metadata(metadata_path) |
50 |
| - labelkey = metadata["label"] |
51 |
| - targets = map(i -> i[labelkey], samples) |
52 |
| - val_num = metadata["val_samples"] |
53 |
| - test_num = metadata["test_samples"] |
54 |
| - train_idxs = 1:length(samples)-val_num-test_num |
55 |
| - val_idxs = length(samples)-val_num-test_num+1:length(samples)-test_num |
56 |
| - test_idxs = length(samples)-test_num+1:length(samples) |
57 |
| - samples, targets, train_idxs, val_idxs, test_idxs |
58 |
| - end |
59 |
| - |
60 |
| - read_data(path) = Vector{Dict}(open(JSON.parse, path)) |
61 |
| - read_metadata(path) = open(JSON.parse, path) |
| 4 | +using DataDeps, JSON3 |
| 5 | +using ..MLDatasets: datafile |
| 6 | + |
| 7 | +const DEPNAME = "Mutagenesis" |
| 8 | +const DATA = "data.json" |
| 9 | +const METADATA = "meta.json" |
| 10 | + |
| 11 | +function __init__() |
| 12 | + ORIGINAL_LINK = "https://relational.fit.cvut.cz/dataset/Mutagenesis" |
| 13 | + DATA_LINK = "https://raw.githubusercontent.com/CTUAvastLab/datasets/main/mutagenesis" |
| 14 | + |
| 15 | + register(DataDep( |
| 16 | + DEPNAME, |
| 17 | + """ |
| 18 | + Dataset: The $DEPNAME dataset. |
| 19 | + Website: $ORIGINAL_LINK |
| 20 | + License: CC0 |
| 21 | + """, |
| 22 | + "$DATA_LINK/" .* [DATA, METADATA], |
| 23 | + )) |
| 24 | +end |
| 25 | + |
| 26 | +traindata(; dir = nothing) = traindata(dir) |
| 27 | +testdata(; dir = nothing) = testdata(dir) |
| 28 | +valdata(; dir = nothing) = valdata(dir) |
| 29 | + |
| 30 | +function traindata(dir) |
| 31 | + samples, targets, train_idxs, val_idxs, test_idxs = load_data(dir) |
| 32 | + samples[train_idxs], targets[train_idxs] |
| 33 | +end |
| 34 | + |
| 35 | +function testdata(dir) |
| 36 | + samples, targets, train_idxs, val_idxs, test_idxs = load_data(dir) |
| 37 | + samples[test_idxs], targets[test_idxs] |
| 38 | +end |
| 39 | + |
| 40 | +function valdata(dir) |
| 41 | + samples, targets, train_idxs, val_idxs, test_idxs = load_data(dir) |
| 42 | + samples[val_idxs], targets[val_idxs] |
| 43 | +end |
| 44 | + |
| 45 | +function load_data(dir) |
| 46 | + data_path = datafile(DEPNAME, DATA, dir) |
| 47 | + metadata_path = datafile(DEPNAME, METADATA, dir) |
| 48 | + samples = read_data(data_path) |
| 49 | + metadata = read_data(metadata_path) |
| 50 | + labelkey = metadata["label"] |
| 51 | + targets = map(i -> i[labelkey], samples) |
| 52 | + val_num = metadata["val_samples"] |
| 53 | + test_num = metadata["test_samples"] |
| 54 | + train_idxs = 1:length(samples)-val_num-test_num |
| 55 | + val_idxs = length(samples)-val_num-test_num+1:length(samples)-test_num |
| 56 | + test_idxs = length(samples)-test_num+1:length(samples) |
| 57 | + samples, targets, train_idxs, val_idxs, test_idxs |
| 58 | +end |
| 59 | + |
| 60 | +read_data(path) = open(JSON3.read, path) |
62 | 61 |
|
63 | 62 | end # module
|
0 commit comments