stuff

CarloLucibello · CarloLucibello · commit 22ab06c9de5e · 2021-11-03T11:45:07.000+01:00
diff --git a/docs/make.jl b/docs/make.jl
@@ -13,6 +13,7 @@ makedocs(;
              "Graphs" => "gnngraph.md",
              "Message Passing" => "messagepassing.md",
              "Model Building" => "models.md",
+             "Datasets" => "datasets.md",
              "API Reference" =>
                [
                 "GNNGraph" => "api/gnngraph.md",
diff --git a/docs/src/datasets.md b/docs/src/datasets.md
@@ -0,0 +1,4 @@
+# Datasets
+
+GNN.jl doesn't come with its own datasets, but leverages those available in the julia (and non-julia) ecosytem. In particular, the [examples in the GNN.jl repository](https://github.com/CarloLucibello/GraphNeuralNetworks.jl/tree/master/examples) make use of the [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl) package. There you will find common graph datasets sich as Cora, PubMed, and Citeseer.
+Also MLDatasets gives access to the [TUDataset](https://chrsmrrs.github.io/datasets/docs/datasets/) repository and its numerous datasets.
diff --git a/src/GNNGraphs/GNNGraphs.jl b/src/GNNGraphs/GNNGraphs.jl
@@ -22,26 +22,29 @@ export GNNGraph,
        graph_features
     
 include("query.jl")
-export edge_index, 
-       adjacency_list, 
-       normalized_laplacian, 
-       scaled_laplacian,
+export adjacency_list,
+       edge_index, 
        graph_indicator, 
+       has_multi_edges, 
        is_bidirected,
+       normalized_laplacian, 
+       scaled_laplacian,
        # from Graphs
        adjacency_matrix, 
        degree, 
-       outneighbors, 
-       inneighbors
+       has_self_loops,
+       inneighbors,
+       outneighbors 
 
 include("transform.jl")
 export add_nodes, 
        add_edges, 
-       add_self_loops, 
-       remove_self_loops, 
-       remove_multi_edges,
+       add_self_loops,
        getgraph,
        negative_sample,
+       rand_edge_split,
+       remove_self_loops, 
+       remove_multi_edges,
        # from Flux
        batch, 
        unbatch,
diff --git a/src/GNNGraphs/query.jl b/src/GNNGraphs/query.jl
@@ -243,10 +243,35 @@ function is_bidirected(g::GNNGraph)
     all((s1 .== s2) .& (t1 .== t2))
 end
 
-@non_differentiable normalized_laplacian(x...)
-@non_differentiable normalized_adjacency(x...)
-@non_differentiable scaled_laplacian(x...)
-@non_differentiable adjacency_matrix(x...)
+"""
+    has_self_loops(g::GNNGraph)
+
+Return `true` if `g` has any self loops.
+"""
+function Graphs.has_self_loops(g::GNNGraph)
+    s, t = edge_index(g)
+    any(s .== t)
+end
+
+"""
+    has_multi_edges(g::GNNGraph)
+
+Return `true` if `g` has any multiple edges.
+"""
+function has_multi_edges(g::GNNGraph)
+    s, t = edge_index(g)
+    idxs = edge_encoding(s, t, g.num_nodes)
+    length(union(idxs)) < length(idxs)
+end
+
+
 @non_differentiable adjacency_list(x...)
+@non_differentiable adjacency_matrix(x...)
 @non_differentiable degree(x...)
 @non_differentiable graph_indicator(x...)
+@non_differentiable has_multi_edges(x...)
+@non_differentiable Graphs.has_self_loops(x...) 
+@non_differentiable is_bidirected(x...)
+@non_differentiable normalized_adjacency(x...)
+@non_differentiable normalized_laplacian(x...)
+@non_differentiable scaled_laplacian(x...)
diff --git a/src/GNNGraphs/transform.jl b/src/GNNGraphs/transform.jl
@@ -362,11 +362,55 @@ function negative_sample(g::GNNGraph;
     return GNNGraph(s_neg, t_neg, num_nodes=n) |> device
 end
 
+"""
+    rand_edge_split(g::GNNGraph, frac) -> g1, g2
+
+Randomly partition the edges in `g` to from two graphs, `g1`
+and `g2`. Both will have the same number of nodes as `g`.
+`g1` will contain a fraction `frac` of the original edges, 
+while `g2` wil contain the rest.
+Useful for train/test splits in link prediction tasks.
+"""
+function rand_edge_split(g::GNNGraph, frac)
+    s, t = edge_index(g)
+    eids = randperm(g.num_edges)
+    size1 = round(Int, g.num_edges * frac)
+    
+    s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
+    g1 = GNNGraph(s1, t1, num_nodes=g.num_nodes)
+
+    s, t = edge_index(g)
+    eids = randperm(g.num_edges)
+    size1 = round(Int, g.num_edges * frac)
+    
+    s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
+    g1 = GNNGraph(s1, t1, num_nodes=g.num_nodes)
+
+    s2, t2 = s[eids[size1+1:end]], t[eids[size1+1:end]]
+    g2 = GNNGraph(s2, t2, num_nodes=g.num_nodes)
+
+    return g1, g2
+end
+
+
 # each edge is represented by a number in
 # 1:N^2
-function edge_encoding(s, t, n)
-    idx = (s .- 1) .* n .+ t
-    maxid = n^2 
+function edge_encoding(s, t, n; directed=true)
+    if directed
+        # directed edges and self-loops allowed
+        idx = (s .- 1) .* n .+ t
+        maxid = n^2
+    else 
+        # undirected edges and self-loops allowed
+        maxid = n * (n - 1) ÷ 2
+        mask = s .<= t
+        s1, t1 = s[mask], t[mask]
+        t2, s2 = s[.!mask], t[.!mask]
+        s, t = [s1; s2], [t1; t2] 
+        offset1 = (n .* 0:n-1) .- cumsum(0:n-1)
+        offset2 = 0:n-1
+        idx = offset1[s] .+ (t .- offset2)
+    end
     return idx, maxid
 end
 

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ makedocs(;`
`13`	`13`	`"Graphs" => "gnngraph.md",`
`14`	`14`	`"Message Passing" => "messagepassing.md",`
`15`	`15`	`"Model Building" => "models.md",`
	`16`	`+ "Datasets" => "datasets.md",`
`16`	`17`	`"API Reference" =>`
`17`	`18`	`[`
`18`	`19`	`"GNNGraph" => "api/gnngraph.md",`