Merge pull request #45 from CarloLucibello/cl/dev

CarloLucibello · web-flow · commit c514d272c1c9 · 2021-09-22T13:32:45.000+03:00
add test for Cora example
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -22,6 +22,8 @@ jobs:
           - ubuntu-latest
         arch:
           - x64
+    env:              # Don't use system Python (needed by PyCall)
+      PYTHON: ""
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
diff --git a/Project.toml b/Project.toml
@@ -36,8 +36,9 @@ julia = "1.6"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "Adapt", "Zygote", "FiniteDifferences", "ChainRulesTestUtils"]
+test = ["Test", "Adapt", "Zygote", "FiniteDifferences", "ChainRulesTestUtils", "MLDatasets"]
diff --git a/examples/graph_classification_tudataset.jl b/examples/graph_classification_tudataset.jl
@@ -1,13 +1,12 @@
 # An example of graph classification
 
 using Flux
-using Flux: @functor, dropout, onecold, onehotbatch, getindex
+using Flux:onecold, onehotbatch
 using Flux.Losses: logitbinarycrossentropy
 using Flux.Data: DataLoader
 using GraphNeuralNetworks
 using MLDatasets: TUDataset
 using Statistics, Random
-using LearnBase: getobs
 using CUDA
 CUDA.allowscalar(false)
 
@@ -76,8 +75,8 @@ function train(; kws...)
     @info gfull
     
     perm = randperm(gfull.num_graphs)
-    gtrain = getobs(gfull, perm[1:NUM_TRAIN])
-    gtest = getobs(gfull, perm[NUM_TRAIN+1:end]) 
+    gtrain, _ = getgraph(gfull, perm[1:NUM_TRAIN])
+    gtest, _ = getgraph(gfull, perm[NUM_TRAIN+1:end]) 
     train_loader = DataLoader(gtrain, batchsize=args.batchsize, shuffle=true)
     test_loader = DataLoader(gtest, batchsize=args.batchsize, shuffle=false)
     
@@ -121,4 +120,4 @@ function train(; kws...)
     end
 end
 
-# train()
+train()
diff --git a/examples/node_classification_cora.jl b/examples/node_classification_cora.jl
@@ -1,7 +1,7 @@
 # An example of semi-supervised node classification
 
 using Flux
-using Flux: @functor, dropout, onecold, onehotbatch
+using Flux: onecold, onehotbatch
 using Flux.Losses: logitcrossentropy
 using GraphNeuralNetworks
 using MLDatasets: Cora
@@ -28,13 +28,12 @@ end
 
 function train(; kws...)
     args = Args(; kws...)
-    if args.seed > 0
-        Random.seed!(args.seed)
-        CUDA.seed!(args.seed)
-    end
+
+    args.seed > 0 && Random.seed!(args.seed)
     
     if args.usecuda && CUDA.functional()
         device = gpu
+        args.seed > 0 && CUDA.seed!(args.seed)
         @info "Training on GPU"
     else
         device = cpu
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -288,9 +288,8 @@ end
 
 
 function Base.show(io::IO, l::GATConv)
-    in_channel = size(l.weight, ndims(l.weight))
-    out_channel = size(l.weight, ndims(l.weight)-1)
-    print(io, "GATConv(", in_channel, "=>", out_channel)
+    out_channel, in_channel = size(l.weight)
+    print(io, "GATConv(", in_channel, "=>", out_channel ÷ l.heads)
     print(io, ", LeakyReLU(λ=", l.negative_slope)
     print(io, "))")
 end
@@ -341,7 +340,7 @@ update_node(l::GatedGraphConv, m, x) = m
 # remove after https://github.com/JuliaDiff/ChainRules.jl/pull/521
 @non_differentiable fill!(x...)
 
-function (l::GatedGraphConv)(g::GNNGraph, H::AbstractMatrix{S}) where {T<:AbstractVector,S<:Real}
+function (l::GatedGraphConv)(g::GNNGraph, H::AbstractMatrix{S}) where {S<:Real}
     check_num_nodes(g, H)
     m, n = size(H)
     @assert (m <= l.out_ch) "number of input features must less or equals to output features."
@@ -567,9 +566,8 @@ function (l::SAGEConv)(g::GNNGraph, x::AbstractMatrix)
 end
 
 function Base.show(io::IO, l::SAGEConv)
-    in_channel = size(l.weight1, ndims(l.weight1))
-    out_channel = size(l.weight1, ndims(l.weight1)-1)
-    print(io, "SAGEConv(", in_channel, " => ", out_channel)
+    out_channel, in_channel = size(l.weight)
+    print(io, "SAGEConv(", in_channel ÷ 2, " => ", out_channel)
     l.σ == identity || print(io, ", ", l.σ)
     print(io, ", aggr=", l.aggr)
     print(io, ")")
diff --git a/test/examples/node_classification_cora.jl b/test/examples/node_classification_cora.jl
@@ -0,0 +1,98 @@
+using Flux
+using Flux: onecold, onehotbatch
+using Flux.Losses: logitcrossentropy
+using GraphNeuralNetworks
+using MLDatasets: Cora
+using Statistics, Random
+using CUDA
+CUDA.allowscalar(false)
+
+function eval_loss_accuracy(X, y, ids, model, g)
+    ŷ = model(g, X)
+    l = logitcrossentropy(ŷ[:,ids], y[:,ids])
+    acc = mean(onecold(ŷ[:,ids] |> cpu) .== onecold(y[:,ids] |> cpu))
+    return (loss = round(l, digits=4), acc = round(acc*100, digits=2))
+end
+
+
+# arguments for the `train` function 
+Base.@kwdef mutable struct Args
+    η = 1f-3             # learning rate
+    epochs = 20         # number of epochs
+    seed = 17             # set seed > 0 for reproducibility
+    usecuda = false      # if true use cuda (if available)
+    nhidden = 128        # dimension of hidden features
+end
+
+function train(Layer; verbose=false, kws...)
+    args = Args(; kws...)
+    args.seed > 0 && Random.seed!(args.seed)
+    
+    if args.usecuda && CUDA.functional()
+        device = Flux.gpu
+        args.seed > 0 && CUDA.seed!(args.seed)
+    else
+        device = Flux.cpu
+    end
+
+    # LOAD DATA
+    data = Cora.dataset()
+    g = GNNGraph(data.adjacency_list) |> device
+    X = data.node_features |> device
+    y = onehotbatch(data.node_labels, 1:data.num_classes) |> device
+    train_ids = data.train_indices |> device
+    val_ids = data.val_indices |> device
+    test_ids = data.test_indices |> device
+    ytrain = y[:,train_ids]
+
+    nin, nhidden, nout = size(X,1), args.nhidden, data.num_classes 
+    
+    ## DEFINE MODEL
+    model = GNNChain(Layer(nin, nhidden),
+                     Dropout(0.5),
+                     Layer(nhidden, nhidden), 
+                     Dense(nhidden, nout))  |> device
+
+    ps = Flux.params(model)
+    opt = ADAM(args.η)
+    
+
+    ## TRAINING
+    function report(epoch)
+        train = eval_loss_accuracy(X, y, train_ids, model, g)
+        test = eval_loss_accuracy(X, y, test_ids, model, g)        
+        println("Epoch: $epoch   Train: $(train)   Test: $(test)")
+    end
+    
+    verbose && report(0)
+    for epoch in 1:args.epochs
+        gs = Flux.gradient(ps) do
+            ŷ = model(g, X)
+            logitcrossentropy(ŷ[:,train_ids], ytrain)
+        end
+        verbose && report(epoch)
+        Flux.Optimise.update!(opt, ps, gs)
+    end
+
+    train_res = eval_loss_accuracy(X, y, train_ids, model, g)
+    test_res = eval_loss_accuracy(X, y, test_ids, model, g)        
+    return train_res, test_res
+end
+
+for Layer in [
+            (nin, nout) -> GCNConv(nin => nout, relu),
+            (nin, nout) -> GraphConv(nin => nout, relu, aggr=mean),
+            (nin, nout) -> SAGEConv(nin => nout, relu),
+            (nin, nout) -> GATConv(nin => nout, relu),
+            (nin, nout) -> GATConv(nin => nout÷2, relu, heads=2),
+            (nin, nout) -> GINConv(Dense(nin, nout, relu)),
+            (nin, nout) -> ChebConv(nin => nout, 3),
+            # (nin, nout) -> NNConv(nin => nout),  # needs edge features
+            # (nin, nout) -> GatedGraphConv(nout, 2),  # needs nin = nout
+            # (nin, nout) -> EdgeConv(Dense(2nin, nout, relu)), # Fits the traning set but does not generalize well
+              ]
+    train_res, test_res = train(Layer, verbose=true)
+    # @show Layer(2,2) train_res, test_res
+    @test train_res.acc > 95
+    @test test_res.acc > 70
+end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -40,20 +40,17 @@
     end
 
     @testset "ChebConv" begin
-        k = 6
+        k = 3
         l = ChebConv(in_channel => out_channel, k)
         @test size(l.weight) == (out_channel, in_channel, k)
         @test size(l.bias) == (out_channel,)
         @test l.k == k
         for g in test_graphs
-            if g === g_single_vertex && GRAPH_T == :dense
-                @test_broken test_layer(l, g, rtol=1e-5, broken_grad_fields=[:weight], test_gpu=false)
-            else
-                test_layer(l, g, rtol=1e-5, broken_grad_fields=[:weight], test_gpu=false)
-                if TEST_GPU
-                    @test_broken test_layer(l, g, rtol=1e-5, broken_grad_fields=[:weight], test_gpu=true)
-                end
-            end            
+            g = add_self_loops(g)
+            test_layer(l, g, rtol=1e-5, test_gpu=false, outsize=(out_channel, g.num_nodes))
+            if TEST_GPU
+                @test_broken test_layer(l, g, rtol=1e-5, test_gpu=true, outsize=(out_channel, g.num_nodes))
+            end              
         end
         
         @testset "bias=false" begin
@@ -81,10 +78,10 @@
 
     @testset "GATConv" begin
 
-        for heads in (1, 3), concat in (true, false)
+        for heads in (1, 2), concat in (true, false)
             l = GATConv(in_channel => out_channel; heads, concat)
             for g in test_graphs
-                test_layer(l, g, rtol=1e-4, 
+                test_layer(l, g, rtol=1e-4,
                     outsize=(concat ? heads*out_channel : out_channel, g.num_nodes))
             end
         end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -8,8 +8,11 @@ using LearnBase
 using LightGraphs
 using Zygote
 using Test
+using MLDatasets
 CUDA.allowscalar(false)
 
+ENV["DATADEPS_ALWAYS_ACCEPT"] = true # for MLDatasets
+
 include("test_utils.jl")
 
 tests = [
@@ -18,6 +21,7 @@ tests = [
     "layers/basic",
     "layers/conv",
     "layers/pool",
+    "examples/node_classification_cora",
 ]
 
 !CUDA.functional() && @warn("CUDA unavailable, not testing GPU support")