Merge pull request #18 from CarloLucibello/cl/tudataset

CarloLucibello · web-flow · commit ff912948eccf · 2021-09-07T15:04:51.000+02:00
add graph classification example
diff --git a/Project.toml b/Project.toml
@@ -11,7 +11,6 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
diff --git a/README.md b/README.md
@@ -13,7 +13,8 @@ Some of its noticeable features are the following:
 * CUDA support.
 * Integrated with the JuliaGraphs ecosystem.
 * Supports generic graph neural network architectures.
-* Easy to define custom graph convolutional layers.
+* Operation on batched graphs. 
+* Easily define your custom graph convolutional layers.
 
 ## Installation
 
diff --git a/examples/Project.toml b/examples/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694"
+LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
+NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
+NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
diff --git a/examples/graph_classification_tudataset.jl b/examples/graph_classification_tudataset.jl
@@ -0,0 +1,135 @@
+# An example of graph classification
+
+using Flux
+using Flux: @functor, dropout, onecold, onehotbatch, getindex
+using Flux.Losses: logitbinarycrossentropy
+using Flux.Data: DataLoader
+using GraphNeuralNetworks
+using MLDatasets: TUDataset
+using Statistics, Random
+using CUDA
+CUDA.allowscalar(false)
+
+function eval_loss_accuracy(model, data_loader, device)
+    loss = 0.
+    acc = 0.
+    ntot = 0
+    for (g, X, y) in data_loader
+        g, X, y = g |> device, X |> device, y |> device
+        n = length(y) 
+        ŷ = model(g, X) |> vec
+        loss += logitbinarycrossentropy(ŷ, y) * n 
+        acc += mean((2 .* ŷ .- 1) .* (2 .* y .- 1) .> 0) * n
+        ntot += n
+    end        
+    return (loss = round(loss/ntot, digits=4), acc = round(acc*100/ntot, digits=2))
+end
+
+struct GNNData
+    g
+    X
+    y
+end
+
+Base.getindex(data::GNNData, i::Int) = getindex(data, [i])
+
+function Base.getindex(data::GNNData, i::AbstractVector)
+    sg, nodemap = subgraph(data.g, i)
+    return (sg, data.X[:,nodemap], data.y[i])
+end
+
+# Flux's Dataloader compatibility. Related PR https://github.com/FluxML/Flux.jl/pull/1683
+Flux.Data._nobs(data::GNNData) = data.g.num_graphs
+Flux.Data._getobs(data::GNNData, i) = data[i] 
+
+function process_dataset(data)
+    g = GNNGraph(data.source, data.target, num_nodes=data.num_nodes, graph_indicator=data.graph_indicator)
+    X = Array{Float32}(onehotbatch(data.node_labels, 0:6))
+    # The dataset also has edge features but we won't be using them
+    # E = Array{Float32}(onehotbatch(data.edge_labels, sort(unique(data.edge_labels))))
+    y = (1 .+ Array{Float32}(data.graph_labels)) ./ 2
+    @assert all(∈([0,1]), y) # binary classification 
+    return GNNData(g, X, y)
+end
+
+# arguments for the `train` function 
+Base.@kwdef mutable struct Args
+    η = 1f-3             # learning rate
+    batchsize = 64      # batch size (number of graphs in each batch)
+    epochs = 200         # number of epochs
+    seed = 17             # set seed > 0 for reproducibility
+    usecuda = true      # if true use cuda (if available)
+    nhidden = 128        # dimension of hidden features
+    infotime = 10 	     # report every `infotime` epochs
+end
+
+function train(; kws...)
+    args = Args(; kws...)
+    args.seed > 0 && Random.seed!(args.seed)
+    
+    if args.usecuda && CUDA.functional()
+        device = gpu
+        args.seed > 0 && CUDA.seed!(args.seed)
+        @info "Training on GPU"
+    else
+        device = cpu
+        @info "Training on CPU"
+    end
+
+    # LOAD DATA
+
+    NUM_TRAIN = 150
+    full_data = TUDataset("MUTAG")
+    
+    @info "MUTAG DATASET
+            num_nodes: $(full_data.num_nodes)  
+            num_edges: $(full_data.num_edges)  
+            num_graphs: $(full_data.num_graphs)"
+    
+    perm = randperm(full_data.num_graphs)
+    dtrain = process_dataset(full_data[perm[1:NUM_TRAIN]]) 
+    dtest = process_dataset(full_data[perm[NUM_TRAIN+1:end]]) 
+    train_loader = DataLoader(dtrain, batchsize=args.batchsize, shuffle=true)
+    test_loader = DataLoader(dtest, batchsize=args.batchsize, shuffle=false)
+    
+    # DEFINE MODEL
+
+    nin = size(dtrain.X, 1)
+    nhidden = args.nhidden
+    
+    model = GNNChain(GraphConv(nin => nhidden, relu),
+                     Dropout(0.5),
+                     GraphConv(nhidden => nhidden, relu),
+                     GlobalPool(mean), 
+                     Dense(nhidden, 1))  |> device
+
+    ps = Flux.params(model)
+    opt = ADAM(args.η)
+
+    
+    # LOGGING FUNCTION
+
+    function report(epoch)
+        train = eval_loss_accuracy(model, train_loader, device)
+        test = eval_loss_accuracy(model, test_loader, device)
+        println("Epoch: $epoch   Train: $(train)   Test: $(test)")
+    end
+    
+    # TRAIN
+    
+    report(0)
+    for epoch in 1:args.epochs
+        for (g, X, y) in train_loader
+            g, X, y = g |> device, X |> device, y |> device
+            gs = Flux.gradient(ps) do
+                ŷ = model(g, X) |> vec
+                logitbinarycrossentropy(ŷ, y)
+            end
+            Flux.Optimise.update!(opt, ps, gs)
+        end
+        
+        epoch % args.infotime == 0 && report(epoch)
+    end
+end
+
+train()
diff --git a/examples/node_classification_cora.jl b/examples/node_classification_cora.jl
@@ -21,7 +21,7 @@ Base.@kwdef mutable struct Args
     η = 1f-3             # learning rate
     epochs = 100          # number of epochs
     seed = 17             # set seed > 0 for reproducibility
-    use_cuda = true      # if true use cuda (if available)
+    usecuda = true      # if true use cuda (if available)
     nhidden = 128        # dimension of hidden features
     infotime = 10 	     # report every `infotime` epochs
 end
@@ -33,7 +33,7 @@ function train(; kws...)
         CUDA.seed!(args.seed)
     end
     
-    if args.use_cuda && CUDA.functional()
+    if args.usecuda && CUDA.functional()
         device = gpu
         @info "Training on GPU"
     else
diff --git a/src/GraphNeuralNetworks.jl b/src/GraphNeuralNetworks.jl
@@ -24,7 +24,8 @@ export
     edge_index,
     node_feature, edge_feature, global_feature,
     adjacency_list, normalized_laplacian, scaled_laplacian,
-    add_self_loops,
+    add_self_loops, remove_self_loops,
+    subgraph,
 
     # from LightGraphs
     adjacency_matrix, 
diff --git a/src/gnngraph.jl b/src/gnngraph.jl
@@ -11,7 +11,7 @@ const ADJMAT_T = AbstractMatrix
 const SPARSE_T = AbstractSparseMatrix # subset of ADJMAT_T
 
 """
-    GNNGraph(data; [graph_type, nf, ef, gf, num_nodes, num_graphs, graph_indicator, dir])
+    GNNGraph(data; [graph_type, nf, ef, gf, num_nodes, graph_indicator, dir])
     GNNGraph(g::GNNGraph; [nf, ef, gf])
 
 A type representing a graph structure and storing also arrays 
@@ -23,6 +23,11 @@ is governed by `graph_type`.
 When constructed from another graph `g`, the internal graph representation
 is preserved and shared. 
 
+A `GNNGraph` can also represent multiple graphs batched togheter 
+(see [`Flux.batch`](@ref) or [`SparseArrays.blockdiag`](@ref)).
+The field `g.graph_indicator` contains the graph membership
+of each node.
+
 A `GNNGraph` is a LightGraphs' `AbstractGraph`, therefore any functionality
 from the LightGraphs' graph library can be used on it.
 
@@ -45,7 +50,6 @@ from the LightGraphs' graph library can be used on it.
 - `dir`. The assumed edge direction when given adjacency matrix or adjacency list input data `g`. 
         Possible values are `:out` and `:in`. Default `:out`.
 - `num_nodes`. The number of nodes. If not specified, inferred from `g`. Default `nothing`.
-- `num_graphs`. The number of graphs. Larger than 1 in case of batched graphs. Default `1`.
 - `graph_indicator`. For batched graphs, a vector containeing the graph assigment of each node. Default `nothing`.  
 - `nf`: Node features. Either nothing, or an array whose last dimension has size num_nodes. Default `nothing`.
 - `ef`: Edge features. Either nothing, or an array whose last dimension has size num_edges. Default `nothing`.
@@ -118,17 +122,17 @@ function GNNGraph(data;
 
     @assert graph_type ∈ [:coo, :dense, :sparse] "Invalid graph_type $graph_type requested"
     @assert dir ∈ [:in, :out]
+    
     if graph_type == :coo
         g, num_nodes, num_edges = to_coo(data; num_nodes, dir)
     elseif graph_type == :dense
         g, num_nodes, num_edges = to_dense(data; dir)
     elseif graph_type == :sparse
         g, num_nodes, num_edges = to_sparse(data; dir)
     end
-    if num_graphs > 1
-        @assert len(graph_indicator) = num_nodes "When batching multiple graphs `graph_indicator` should be filled with the nodes' memberships."
-    end 
-
+    
+    num_graphs = !isnothing(graph_indicator) ? maximum(graph_indicator) : 1
+    
     ## Possible future implementation of feature maps. 
     ## Currently this doesn't play well with zygote due to 
     ## https://github.com/FluxML/Zygote.jl/issues/717    
@@ -149,8 +153,8 @@ GNNGraph((s, t)::NTuple{2}; kws...) = GNNGraph((s, t, nothing); kws...)
 
 function GNNGraph(g::AbstractGraph; kws...)
     s = LightGraphs.src.(LightGraphs.edges(g))
-    t = LightGraphs.dst.(LightGraphs.edges(g)) 
-    GNNGraph((s, t); kws...)
+    t = LightGraphs.dst.(LightGraphs.edges(g))
+    GNNGraph((s, t); num_nodes = nv(g), kws...)
 end
 
 function GNNGraph(g::GNNGraph; 
@@ -431,19 +435,76 @@ function _catgraphs(g1::GNNGraph{<:COO_T}, g2::GNNGraph{<:COO_T})
     )
 end
 
-# Cat public interfaces
+### Cat public interfaces #############
+
+"""
+    blockdiag(xs::GNNGraph...)
+
+Batch togheter multiple `GNNGraph`s into a single one 
+containing the total number of nodes and edges of the original graphs.
+
+Equivalent to [`Flux.batch`](@ref).
+"""
 function SparseArrays.blockdiag(g1::GNNGraph, gothers::GNNGraph...)
-    @assert length(gothers) >= 1
     g = g1
     for go in gothers
         g = _catgraphs(g, go)
     end
     return g
 end
 
+"""
+    batch(xs::Vector{<:GNNGraph})
+
+Batch togheter multiple `GNNGraph`s into a single one 
+containing the total number of nodes and edges of the original graphs.
+
+Equivalent to [`SparseArrays.blockdiag`](@ref).
+"""
 Flux.batch(xs::Vector{<:GNNGraph}) = blockdiag(xs...)
 #########################
 
+"""
+    subgraph(g::GNNGraph, i)
+
+Return the subgraph of `g` induced by those nodes `v`
+for which `g.graph_indicator[v] ∈ i`. In other words, it
+extract the component graphs from a batched graph. 
+
+It also returns a vector `nodes` mapping the new nodes to the old ones. 
+The node `i` in the subgraph corresponds to the node `nodes[i]` in `g`.
+"""
+subgraph(g::GNNGraph, i::Int) = subgraph(g::GNNGraph{<:COO_T}, [i])
+
+function subgraph(g::GNNGraph{<:COO_T}, i::AbstractVector)
+    node_mask = g.graph_indicator .∈ Ref(i)
+    
+    nodes = (1:g.num_nodes)[node_mask]
+    nodemap = Dict(v => vnew for (vnew, v) in enumerate(nodes))
+
+    graphmap = Dict(i => inew for (inew, i) in enumerate(i))
+    graph_indicator = [graphmap[i] for i in g.graph_indicator[node_mask]]
+    
+    s, t, w = g.graph
+    edge_mask = s .∈ Ref(nodes) 
+    s = [nodemap[i] for i in s[edge_mask]]
+    t = [nodemap[i] for i in t[edge_mask]]
+    w = isnothing(w) ? nothing : w[edge_mask]
+    nf = isnothing(g.nf) ? nothing : g.nf[:,node_mask]
+    ef = isnothing(g.ef) ? nothing : g.ef[:,edge_mask]
+    gf = isnothing(g.gf) ? nothing : g.gf[:,i]
+
+    num_nodes = length(graph_indicator)
+    num_edges = length(s)
+    num_graphs = length(i)
+
+    gnew = GNNGraph((s,t,w), 
+                num_nodes, num_edges, num_graphs,
+                graph_indicator,
+                nf, ef, gf)
+    return gnew, nodes
+end
+
 @non_differentiable normalized_laplacian(x...)
 @non_differentiable normalized_adjacency(x...)
 @non_differentiable scaled_laplacian(x...)
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -147,8 +147,9 @@ Graph convolution layer from Reference: [Weisfeiler and Leman Go Neural: Higher-
 
 Performs:
 ```math
-\mathbf{x}_i' = W^1 \mathbf{x}_i + \box_{j \in \mathcal{N}(i)} W^2 \mathbf{x}_j)
+\mathbf{x}_i' = W^1 \mathbf{x}_i + \square_{j \in \mathcal{N}(i)} W^2 \mathbf{x}_j)
 ```
+
 where the aggregation type is selected by `aggr`.
 
 # Arguments
@@ -206,7 +207,7 @@ end
             concat=true,
             init=glorot_uniform    
             bias=true, 
-            negative_slope=0.2)
+            negative_slope=0.2f0)
 
 Graph attentional layer from the paper [Graph Attention Networks](https://arxiv.org/abs/1710.10903).
 
@@ -216,7 +217,7 @@ Implements the operation
 ```
 where the attention coefficient ``\alpha_{ij}`` is given by
 ```math
-\alpha_{ij} = \frac{1}{z_i} exp(LeakyReLU(\mathbf{a}^T [W \mathbf{x}_i || W \mathbf{x}_j]))
+\alpha_{ij} = \frac{1}{z_i} \exp(LeakyReLU(\mathbf{a}^T [W \mathbf{x}_i || W \mathbf{x}_j]))
 ```
 with ``z_i`` a normalization factor.
 
@@ -301,7 +302,7 @@ Gated graph convolution layer from [Gated Graph Sequence Neural Networks](https:
 Implements the recursion
 ```math
 \mathbf{h}^{(0)}_i = \mathbf{x}_i || \mathbf{0} \\
-\mathbf{h}^{(l)}_i = GRU(\mathbf{h}^{(l-1)}_i, \box_{j \in N(i)} W \mathbf{h}^{(l-1)}_j)
+\mathbf{h}^{(l)}_i = GRU(\mathbf{h}^{(l-1)}_i, \square_{j \in N(i)} W \mathbf{h}^{(l-1)}_j)
 ```
 
  where ``\mathbf{h}^{(l)}_i`` denotes the ``l``-th hidden variables passing through GRU. The dimension of input ``\mathbf{x}_i`` needs to be less or equal to `out`.
@@ -369,7 +370,7 @@ Edge convolutional layer from paper [Dynamic Graph CNN for Learning on Point Clo
 
 Performs the operation
 ```math
-\mathbf{x}_i' = \box_{j \in N(i)} f(\mathbf{x}_i || \mathbf{x}_j - \mathbf{x}_i)
+\mathbf{x}_i' = \square_{j \in N(i)} f(\mathbf{x}_i || \mathbf{x}_j - \mathbf{x}_i)
 ```
 
 where `f` typically denotes a learnable function, e.g. a linear layer or a multi-layer perceptron.
diff --git a/src/layers/pool.jl b/src/layers/pool.jl
@@ -24,7 +24,7 @@ X = rand(32, 10)
 pool(g, X) # => 32x1 matrix
 ```
 """
-struct GlobalPool{F}
+struct GlobalPool{F} <: GNNLayer
     aggr::F
 end
 
diff --git a/test/gnngraph.jl b/test/gnngraph.jl
diff --git a/test/runtests.jl b/test/runtests.jl