Skip to content

Commit 22ab06c

Browse files
stuff
1 parent 7f14dc9 commit 22ab06c

File tree

5 files changed

+93
-16
lines changed

5 files changed

+93
-16
lines changed

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ makedocs(;
1313
"Graphs" => "gnngraph.md",
1414
"Message Passing" => "messagepassing.md",
1515
"Model Building" => "models.md",
16+
"Datasets" => "datasets.md",
1617
"API Reference" =>
1718
[
1819
"GNNGraph" => "api/gnngraph.md",

docs/src/datasets.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Datasets
2+
3+
GNN.jl doesn't come with its own datasets, but leverages those available in the julia (and non-julia) ecosytem. In particular, the [examples in the GNN.jl repository](https://github.com/CarloLucibello/GraphNeuralNetworks.jl/tree/master/examples) make use of the [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl) package. There you will find common graph datasets sich as Cora, PubMed, and Citeseer.
4+
Also MLDatasets gives access to the [TUDataset](https://chrsmrrs.github.io/datasets/docs/datasets/) repository and its numerous datasets.

src/GNNGraphs/GNNGraphs.jl

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,29 @@ export GNNGraph,
2222
graph_features
2323

2424
include("query.jl")
25-
export edge_index,
26-
adjacency_list,
27-
normalized_laplacian,
28-
scaled_laplacian,
25+
export adjacency_list,
26+
edge_index,
2927
graph_indicator,
28+
has_multi_edges,
3029
is_bidirected,
30+
normalized_laplacian,
31+
scaled_laplacian,
3132
# from Graphs
3233
adjacency_matrix,
3334
degree,
34-
outneighbors,
35-
inneighbors
35+
has_self_loops,
36+
inneighbors,
37+
outneighbors
3638

3739
include("transform.jl")
3840
export add_nodes,
3941
add_edges,
40-
add_self_loops,
41-
remove_self_loops,
42-
remove_multi_edges,
42+
add_self_loops,
4343
getgraph,
4444
negative_sample,
45+
rand_edge_split,
46+
remove_self_loops,
47+
remove_multi_edges,
4548
# from Flux
4649
batch,
4750
unbatch,

src/GNNGraphs/query.jl

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,35 @@ function is_bidirected(g::GNNGraph)
243243
all((s1 .== s2) .& (t1 .== t2))
244244
end
245245

246-
@non_differentiable normalized_laplacian(x...)
247-
@non_differentiable normalized_adjacency(x...)
248-
@non_differentiable scaled_laplacian(x...)
249-
@non_differentiable adjacency_matrix(x...)
246+
"""
247+
has_self_loops(g::GNNGraph)
248+
249+
Return `true` if `g` has any self loops.
250+
"""
251+
function Graphs.has_self_loops(g::GNNGraph)
252+
s, t = edge_index(g)
253+
any(s .== t)
254+
end
255+
256+
"""
257+
has_multi_edges(g::GNNGraph)
258+
259+
Return `true` if `g` has any multiple edges.
260+
"""
261+
function has_multi_edges(g::GNNGraph)
262+
s, t = edge_index(g)
263+
idxs = edge_encoding(s, t, g.num_nodes)
264+
length(union(idxs)) < length(idxs)
265+
end
266+
267+
250268
@non_differentiable adjacency_list(x...)
269+
@non_differentiable adjacency_matrix(x...)
251270
@non_differentiable degree(x...)
252271
@non_differentiable graph_indicator(x...)
272+
@non_differentiable has_multi_edges(x...)
273+
@non_differentiable Graphs.has_self_loops(x...)
274+
@non_differentiable is_bidirected(x...)
275+
@non_differentiable normalized_adjacency(x...)
276+
@non_differentiable normalized_laplacian(x...)
277+
@non_differentiable scaled_laplacian(x...)

src/GNNGraphs/transform.jl

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,11 +362,55 @@ function negative_sample(g::GNNGraph;
362362
return GNNGraph(s_neg, t_neg, num_nodes=n) |> device
363363
end
364364

365+
"""
366+
rand_edge_split(g::GNNGraph, frac) -> g1, g2
367+
368+
Randomly partition the edges in `g` to from two graphs, `g1`
369+
and `g2`. Both will have the same number of nodes as `g`.
370+
`g1` will contain a fraction `frac` of the original edges,
371+
while `g2` wil contain the rest.
372+
Useful for train/test splits in link prediction tasks.
373+
"""
374+
function rand_edge_split(g::GNNGraph, frac)
375+
s, t = edge_index(g)
376+
eids = randperm(g.num_edges)
377+
size1 = round(Int, g.num_edges * frac)
378+
379+
s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
380+
g1 = GNNGraph(s1, t1, num_nodes=g.num_nodes)
381+
382+
s, t = edge_index(g)
383+
eids = randperm(g.num_edges)
384+
size1 = round(Int, g.num_edges * frac)
385+
386+
s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
387+
g1 = GNNGraph(s1, t1, num_nodes=g.num_nodes)
388+
389+
s2, t2 = s[eids[size1+1:end]], t[eids[size1+1:end]]
390+
g2 = GNNGraph(s2, t2, num_nodes=g.num_nodes)
391+
392+
return g1, g2
393+
end
394+
395+
365396
# each edge is represented by a number in
366397
# 1:N^2
367-
function edge_encoding(s, t, n)
368-
idx = (s .- 1) .* n .+ t
369-
maxid = n^2
398+
function edge_encoding(s, t, n; directed=true)
399+
if directed
400+
# directed edges and self-loops allowed
401+
idx = (s .- 1) .* n .+ t
402+
maxid = n^2
403+
else
404+
# undirected edges and self-loops allowed
405+
maxid = n * (n - 1) ÷ 2
406+
mask = s .<= t
407+
s1, t1 = s[mask], t[mask]
408+
t2, s2 = s[.!mask], t[.!mask]
409+
s, t = [s1; s2], [t1; t2]
410+
offset1 = (n .* 0:n-1) .- cumsum(0:n-1)
411+
offset2 = 0:n-1
412+
idx = offset1[s] .+ (t .- offset2)
413+
end
370414
return idx, maxid
371415
end
372416

0 commit comments

Comments
 (0)