Skip to content

Commit 57d84ef

Browse files
support for MLDatasets v0.6 (#164)
* mldatasets new release * cleanup * neuralode example * docs * fix node test * cleanup * cleanup * julia 1.6 compat * more tests * cleanup
1 parent eb43bc9 commit 57d84ef

16 files changed

+217
-89
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
1313
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
1414
KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
1515
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
16+
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
1617
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
1718
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1819
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"

docs/src/datasets.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Datasets
22

3-
GraphNeuralNetworks.jl doesn't come with its own datasets, but leverages those available in the Julia (and non-Julia) ecosystem. In particular, the [examples in the GraphNeuralNetworks.jl repository](https://github.com/CarloLucibello/GraphNeuralNetworks.jl/tree/master/examples) make use of the [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl) package. There you will find common graph datasets such as Cora, PubMed, and Citeseer.
4-
Also MLDatasets gives access to the [TUDataset](https://chrsmrrs.github.io/datasets/docs/datasets/) repository and its numerous datasets.
3+
GraphNeuralNetworks.jl doesn't come with its own datasets, but leverages those available in the Julia (and non-Julia) ecosystem. In particular, the [examples in the GraphNeuralNetworks.jl repository](https://github.com/CarloLucibello/GraphNeuralNetworks.jl/tree/master/examples) make use of the [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl) package. There you will find common graph datasets such as Cora, PubMed, Citeseer, TUDataset and [many others](https://juliaml.github.io/MLDatasets.jl/dev/datasets/graphs/).
4+
5+
GraphNeuralNetworks.jl provides the [`mldatasets2gnngraph`](@ref) method for interfacing with MLDatasets.jl.
6+
7+
```@docs
8+
mldatasets2gnngraph
9+
```

examples/Project.toml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
33
DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
44
DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
55
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
6-
GeometricFlux = "7e08b658-56d3-11e9-2997-919d5b31e4ea"
76
GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694"
8-
GraphSignals = "3ebe565e-a4b5-49c6-aed2-300248c3a9c1"
97
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
108
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
119
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
1210
NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
1311

14-
[extras]
15-
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
12+
[compat]
13+
DiffEqFlux = "1.45"
14+
Flux = "0.13"
15+
Graphs = "1"
16+
GraphNeuralNetworks = "0.4"
17+
MLDatasets = "0.6"
18+
julia = "1.7"

examples/graph_classification_tudataset.jl

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,18 @@ using Flux.Data: DataLoader
77
using GraphNeuralNetworks
88
using MLDatasets: TUDataset
99
using Statistics, Random
10+
using MLUtils
1011
using CUDA
1112
CUDA.allowscalar(false)
1213

1314
function eval_loss_accuracy(model, data_loader, device)
1415
loss = 0.
1516
acc = 0.
1617
ntot = 0
17-
for g in data_loader
18-
g = g |> device
19-
n = g.num_graphs
20-
y = g.gdata.y
18+
for (graphs, y) in data_loader
19+
g = Flux.batch(graphs) |> device
20+
y = y |> device
21+
n = length(y)
2122
= model(g, g.ndata.x) |> vec
2223
loss += logitbinarycrossentropy(ŷ, y) * n
2324
acc += mean((ŷ .> 0) .== y) * n
@@ -28,26 +29,19 @@ end
2829

2930
function getdataset()
3031
tudata = TUDataset("MUTAG")
31-
32-
x = Array{Float32}(onehotbatch(tudata.node_labels, 0:6))
33-
y = (1 .+ Array{Float32}(tudata.graph_labels)) ./ 2
32+
display(tudata)
33+
graphs = mldataset2gnngraph(tudata)
34+
oh(x) = Float32.(onehotbatch(x, 0:6))
35+
graphs = [GNNGraph(g, ndata=oh(g.ndata.targets)) for g in graphs]
36+
y = (1 .+ Float32.(tudata.graph_data.targets)) ./ 2
3437
@assert all(([0,1]), y) # binary classification
35-
36-
## The dataset also has edge features but we won't be using them
37-
# e = Array{Float32}(onehotbatch(data.edge_labels, sort(unique(data.edge_labels))))
38-
39-
gall = GNNGraph(tudata.source, tudata.target,
40-
num_nodes=tudata.num_nodes,
41-
graph_indicator=tudata.graph_indicator,
42-
ndata=(; x), gdata=(; y))
43-
44-
return [getgraph(gall, i) for i=1:gall.num_graphs]
38+
return graphs, y
4539
end
4640

4741
# arguments for the `train` function
4842
Base.@kwdef mutable struct Args
4943
η = 1f-3 # learning rate
50-
batchsize = 64 # batch size (number of graphs in each batch)
44+
batchsize = 32 # batch size (number of graphs in each batch)
5145
epochs = 200 # number of epochs
5246
seed = 17 # set seed > 0 for reproducibility
5347
usecuda = true # if true use cuda (if available)
@@ -71,19 +65,18 @@ function train(; kws...)
7165
# LOAD DATA
7266
NUM_TRAIN = 150
7367

74-
data = getdataset()
75-
shuffle!(data)
68+
dataset = getdataset()
69+
train_data, test_data = splitobs(dataset, at=NUM_TRAIN/numobs(dataset), shuffle=true)
7670

77-
train_loader = DataLoader(data[1:NUM_TRAIN], batchsize=args.batchsize, shuffle=true)
78-
test_loader = DataLoader(data[NUM_TRAIN+1:end], batchsize=args.batchsize, shuffle=false)
71+
train_loader = DataLoader(train_data, batchsize=args.batchsize, shuffle=true)
72+
test_loader = DataLoader(test_data, batchsize=args.batchsize, shuffle=false)
7973

8074
# DEFINE MODEL
8175

82-
nin = size(data[1].ndata.x, 1)
76+
nin = size(dataset[1][1].ndata.x, 1)
8377
nhidden = args.nhidden
8478

8579
model = GNNChain(GraphConv(nin => nhidden, relu),
86-
Dropout(0.5),
8780
GraphConv(nhidden => nhidden, relu),
8881
GlobalPool(mean),
8982
Dense(nhidden, 1)) |> device
@@ -103,15 +96,15 @@ function train(; kws...)
10396

10497
report(0)
10598
for epoch in 1:args.epochs
106-
for g in train_loader
107-
g = g |> device
99+
for (graphs, y) in train_loader
100+
g = Flux.batch(graphs) |> device
101+
y = y |> device
108102
gs = Flux.gradient(ps) do
109103
= model(g, g.ndata.x) |> vec
110-
logitbinarycrossentropy(ŷ, g.gdata.y)
104+
logitbinarycrossentropy(ŷ, y)
111105
end
112106
Flux.Optimise.update!(opt, ps, gs)
113107
end
114-
115108
epoch % args.infotime == 0 && report(epoch)
116109
end
117110
end

examples/link_prediction_pubmed.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,10 @@ function train(; kws...)
4646
end
4747

4848
### LOAD DATA
49-
data = PubMed.dataset()
50-
g = GNNGraph(data.adjacency_list)
51-
49+
g = mldataset2gnngraph(PubMed())
50+
5251
# Print some info
53-
@info g
52+
display(g)
5453
@show is_bidirected(g)
5554
@show has_self_loops(g)
5655
@show has_multi_edges(g)
@@ -59,7 +58,7 @@ function train(; kws...)
5958

6059
# Move to device
6160
g = g |> device
62-
X = data.node_features |> device
61+
X = g.ndata.features
6362

6463
#### TRAIN/TEST splits
6564
# With bidirected graph, we make sure that an edge and its reverse
@@ -117,4 +116,4 @@ function train(; kws...)
117116
end
118117
end
119118

120-
# train()
119+
train()

examples/neural_ode_cora.jl

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using GraphNeuralNetworks, DiffEqFlux, DifferentialEquations
33
using Flux: onehotbatch, onecold
44
using Flux.Losses: logitcrossentropy
5+
using Flux
56
using Statistics: mean
67
using MLDatasets: Cora
78
using CUDA
@@ -11,27 +12,24 @@ using CUDA
1112
device = CUDA.functional() ? gpu : cpu
1213

1314
# LOAD DATA
14-
data = Cora.dataset()
15-
g = GNNGraph(data.adjacency_list) |> device
16-
X = data.node_features |> device
17-
y = onehotbatch(data.node_labels, 1:data.num_classes) |> device
18-
train_ids = data.train_indices |> device
19-
val_ids = data.val_indices |> device
20-
test_ids = data.test_indices |> device
21-
ytrain = y[:, train_ids]
15+
dataset = Cora()
16+
classes = dataset.metadata["classes"]
17+
g = mldataset2gnngraph(dataset) |> device
18+
X = g.ndata.features
19+
y = onehotbatch(g.ndata.targets |> cpu, classes) |> device # remove when https://github.com/FluxML/Flux.jl/pull/1959 tagged
20+
(; train_mask, val_mask, test_mask) = g.ndata
21+
ytrain = y[:,train_mask]
2222

2323

2424
# Model and Data Configuration
2525
nin = size(X, 1)
2626
nhidden = 16
27-
nout = data.num_classes
27+
nout = length(classes)
2828
epochs = 40
2929

3030
# Define the Neural GDE
3131
diffeqsol_to_array(x) = reshape(device(x), size(x)[1:2])
3232

33-
# GCNConv(nhidden => nhidden, graph=g),
34-
3533
node_chain = GNNChain(GCNConv(nhidden => nhidden, relu),
3634
GCNConv(nhidden => nhidden, relu)) |> device
3735

@@ -40,14 +38,10 @@ node = NeuralODE(WithGraph(node_chain, g),
4038
reltol = 1e-3, abstol = 1e-3, save_start = false) |> device
4139

4240
model = GNNChain(GCNConv(nin => nhidden, relu),
43-
Dropout(0.5),
4441
node,
4542
diffeqsol_to_array,
4643
Dense(nhidden, nout)) |> device
4744

48-
# Loss
49-
loss(x, y) = logitcrossentropy(model(g, x), y)
50-
accuracy(x, y) = mean(onecold(model(g, x)) .== onecold(y))
5145

5246
# # Training
5347
# ## Model Parameters
@@ -56,9 +50,20 @@ ps = Flux.params(model);
5650
# ## Optimizer
5751
opt = ADAM(0.01)
5852

53+
54+
function eval_loss_accuracy(X, y, mask)
55+
= model(g, X)
56+
l = logitcrossentropy(ŷ[:,mask], y[:,mask])
57+
acc = mean(onecold(ŷ[:,mask]) .== onecold(y[:,mask]))
58+
return (loss = round(l, digits=4), acc = round(acc*100, digits=2))
59+
end
60+
5961
# ## Training Loop
6062
for epoch in 1:epochs
61-
gs = gradient(() -> loss(X, y), ps)
63+
gs = gradient(ps) do
64+
= model(g, X)
65+
logitcrossentropy(ŷ[:,train_mask], ytrain)
66+
end
6267
Flux.Optimise.update!(opt, ps, gs)
63-
@show(accuracy(X, y))
68+
@show eval_loss_accuracy(X, y, train_mask)
6469
end

examples/node_classification_cora.jl

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ using Statistics, Random
99
using CUDA
1010
CUDA.allowscalar(false)
1111

12-
function eval_loss_accuracy(X, y, ids, model, g)
12+
function eval_loss_accuracy(X, y, mask, model, g)
1313
= model(g, X)
14-
l = logitcrossentropy(ŷ[:,ids], y[:,ids])
15-
acc = mean(onecold(ŷ[:,ids]) .== onecold(y[:,ids]))
14+
l = logitcrossentropy(ŷ[:,mask], y[:,mask])
15+
acc = mean(onecold(ŷ[:,mask]) .== onecold(y[:,mask]))
1616
return (loss = round(l, digits=4), acc = round(acc*100, digits=2))
1717
end
1818

@@ -41,32 +41,30 @@ function train(; kws...)
4141
end
4242

4343
# LOAD DATA
44-
data = Cora.dataset()
45-
g = GNNGraph(data.adjacency_list) |> device
46-
X = data.node_features |> device
47-
y = onehotbatch(data.node_labels, 1:data.num_classes) |> device
48-
train_ids = data.train_indices |> device
49-
val_ids = data.val_indices |> device
50-
test_ids = data.test_indices |> device
51-
ytrain = y[:,train_ids]
44+
dataset = Cora()
45+
classes = dataset.metadata["classes"]
46+
g = mldataset2gnngraph(dataset) |> device
47+
X = g.ndata.features
48+
y = onehotbatch(g.ndata.targets |> cpu, classes) |> device # remove when https://github.com/FluxML/Flux.jl/pull/1959 tagged
49+
(; train_mask, val_mask, test_mask) = g.ndata
50+
ytrain = y[:,train_mask]
5251

53-
nin, nhidden, nout = size(X,1), args.nhidden, data.num_classes
52+
nin, nhidden, nout = size(X,1), args.nhidden, length(classes)
5453

5554
## DEFINE MODEL
5655
model = GNNChain(GCNConv(nin => nhidden, relu),
57-
Dropout(0.5),
5856
GCNConv(nhidden => nhidden, relu),
5957
Dense(nhidden, nout)) |> device
6058

6159
ps = Flux.params(model)
6260
opt = ADAM(args.η)
6361

64-
@info g
62+
display(g)
6563

6664
## LOGGING FUNCTION
6765
function report(epoch)
68-
train = eval_loss_accuracy(X, y, train_ids, model, g)
69-
test = eval_loss_accuracy(X, y, test_ids, model, g)
66+
train = eval_loss_accuracy(X, y, train_mask, model, g)
67+
test = eval_loss_accuracy(X, y, test_mask, model, g)
7068
println("Epoch: $epoch Train: $(train) Test: $(test)")
7169
end
7270

@@ -75,7 +73,7 @@ function train(; kws...)
7573
for epoch in 1:args.epochs
7674
gs = Flux.gradient(ps) do
7775
= model(g, X)
78-
logitcrossentropy(ŷ[:,train_ids], ytrain)
76+
logitcrossentropy(ŷ[:,train_mask], ytrain)
7977
end
8078

8179
Flux.Optimise.update!(opt, ps, gs)
@@ -85,3 +83,4 @@ function train(; kws...)
8583
end
8684

8785
train()
86+

perf/neural_ode_mnist.jl

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Load the packages
2+
using GraphNeuralNetworks, DiffEqFlux, DifferentialEquations
3+
using Flux: onehotbatch, onecold
4+
using Flux.Losses: logitcrossentropy
5+
using Flux
6+
using Statistics: mean
7+
using MLDatasets
8+
using CUDA
9+
# CUDA.allowscalar(false) # Some scalar indexing is still done by DiffEqFlux
10+
11+
# device = cpu # `gpu` not working yet
12+
device = CUDA.functional() ? gpu : cpu
13+
14+
# LOAD DATA
15+
X, y = MNIST(:train)[:]
16+
y = onehotbatch(y, 0:9)
17+
18+
19+
# Define the Neural GDE
20+
diffeqsol_to_array(x) = reshape(device(x), size(x)[1:2])
21+
22+
nin, nhidden, nout = 28*28, 100, 10
23+
epochs = 10
24+
25+
node_chain = Chain(Dense(nhidden => nhidden, tanh),
26+
Dense(nhidden => nhidden)) |> device
27+
28+
node = NeuralODE(node_chain,
29+
(0.f0, 1.f0), Tsit5(), save_everystep=false,
30+
reltol=1e-3, abstol=1e-3, save_start=false) |> device
31+
32+
model = Chain(Flux.flatten,
33+
Dense(nin => nhidden, relu),
34+
node,
35+
diffeqsol_to_array,
36+
Dense(nhidden, nout)) |> device
37+
38+
# # Training
39+
# ## Model Parameters
40+
ps = Flux.params(model);
41+
42+
# ## Optimizer
43+
opt = ADAM(0.01)
44+
45+
function eval_loss_accuracy(X, y)
46+
= model(X)
47+
l = logitcrossentropy(ŷ, y)
48+
acc = mean(onecold(ŷ) .== onecold(y))
49+
return (loss = round(l, digits=4), acc = round(acc*100, digits=2))
50+
end
51+
52+
# ## Training Loop
53+
for epoch in 1:epochs
54+
gs = gradient(ps) do
55+
= model(X)
56+
logitcrossentropy(ŷ, y)
57+
end
58+
Flux.Optimise.update!(opt, ps, gs)
59+
@show eval_loss_accuracy(X, y)
60+
end

0 commit comments

Comments
 (0)