Skip to content

Commit e17add5

Browse files
classicsongyzh119
authored andcommitted
[NN] Add MXNet impl for TAGCN module. (#799)
* upd * fig edgebatch edges * add test * trigger * Update README.md for pytorch PinSage example. Add noting that the PinSage model example under example/pytorch/recommendation only work with Python 3.6+ as its dataset loader depends on stanfordnlp package which work only with Python 3.6+. * Provid a frame agnostic API to test nn modules on both CPU and CUDA side. 1. make dgl.nn.xxx frame agnostic 2. make test.backend include dgl.nn modules 3. modify test_edge_softmax of test/mxnet/test_nn.py and test/pytorch/test_nn.py work on both CPU and GPU * Fix style * Delete unused code * Make agnostic test only related to tests/backend 1. clear all agnostic related code in dgl.nn 2. make test_graph_conv agnostic to cpu/gpu * Fix code style * fix * doc * Make all test code under tests.mxnet/pytorch.test_nn.py work on both CPU and GPU. * Fix syntex * Remove rand * Add TAGCN nn.module and example * Now tagcn can run on CPU. * Add unitest for TGConv * Fix style * For pubmed dataset, using --lr=0.005 can achieve better acc * Fix style * Fix some descriptions * trigger * Fix doc * Add nn.TGConv and example * Fix bug * Update data in mxnet.tagcn test acc. * Fix some comments and code * delete useless code * Fix namming * Fix bug * Fix bug * Add test code for mxnet TAGCov * Update some docs * Fix some code * Update docs dgl.nn.mxnet * Update weight init * Fix
1 parent 14bffe9 commit e17add5

File tree

9 files changed

+337
-9
lines changed

9 files changed

+337
-9
lines changed

docs/source/api/python/nn.mxnet.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ dgl.nn.mxnet.conv
1616
:members: forward
1717
:show-inheritance:
1818

19+
.. autoclass:: dgl.nn.mxnet.conv.TAGConv
20+
:members: forward
21+
:show-inheritance:
22+
1923
dgl.nn.mxnet.glob
2024
-----------------
2125

examples/mxnet/tagcn/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Topology Adaptive Graph Convolutional networks (TAGCN)
2+
============
3+
4+
- Paper link: [https://arxiv.org/abs/1710.10370](https://arxiv.org/abs/1710.10370)
5+
6+
Dependencies
7+
------------
8+
- MXNet nightly build
9+
- requests
10+
11+
``bash
12+
pip install mxnet --pre
13+
pip install requests
14+
``
15+
16+
Results
17+
-------
18+
Run with following (available dataset: "cora", "citeseer", "pubmed")
19+
```bash
20+
DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0 --self-loop
21+
```
22+
23+
* cora: ~0.820 (paper: 0.833)
24+
* citeseer: ~0.702 (paper: 0.714)
25+
* pubmed: ~0.798 (paper: 0.811)

examples/mxnet/tagcn/tagcn.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""TAGCN using DGL nn package
2+
3+
References:
4+
- Topology Adaptive Graph Convolutional Networks
5+
- Paper: https://arxiv.org/abs/1710.10370
6+
"""
7+
import mxnet as mx
8+
from mxnet import gluon
9+
import dgl
10+
from dgl.nn.mxnet import TAGConv
11+
12+
class TAGCN(gluon.Block):
13+
def __init__(self,
14+
g,
15+
in_feats,
16+
n_hidden,
17+
n_classes,
18+
n_layers,
19+
activation,
20+
dropout):
21+
super(TAGCN, self).__init__()
22+
self.g = g
23+
self.layers = gluon.nn.Sequential()
24+
# input layer
25+
self.layers.add(TAGConv(in_feats, n_hidden, activation=activation))
26+
# hidden layers
27+
for i in range(n_layers - 1):
28+
self.layers.add(TAGConv(n_hidden, n_hidden, activation=activation))
29+
# output layer
30+
self.layers.add(TAGConv(n_hidden, n_classes)) #activation=None
31+
self.dropout = gluon.nn.Dropout(rate=dropout)
32+
33+
def forward(self, features):
34+
h = features
35+
for i, layer in enumerate(self.layers):
36+
if i != 0:
37+
h = self.dropout(h)
38+
h = layer(self.g, h)
39+
return h

examples/mxnet/tagcn/train.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import argparse, time
2+
import numpy as np
3+
import mxnet as mx
4+
from mxnet import gluon
5+
6+
from dgl import DGLGraph
7+
from dgl.data import register_data_args, load_data
8+
9+
from tagcn import TAGCN
10+
11+
def evaluate(model, features, labels, mask):
12+
pred = model(features).argmax(axis=1)
13+
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
14+
return accuracy.asscalar()
15+
16+
def main(args):
17+
# load and preprocess dataset
18+
data = load_data(args)
19+
features = mx.nd.array(data.features)
20+
labels = mx.nd.array(data.labels)
21+
train_mask = mx.nd.array(data.train_mask)
22+
val_mask = mx.nd.array(data.val_mask)
23+
test_mask = mx.nd.array(data.test_mask)
24+
in_feats = features.shape[1]
25+
n_classes = data.num_labels
26+
n_edges = data.graph.number_of_edges()
27+
print("""----Data statistics------'
28+
#Edges %d
29+
#Classes %d
30+
#Train samples %d
31+
#Val samples %d
32+
#Test samples %d""" %
33+
(n_edges, n_classes,
34+
train_mask.sum().asscalar(),
35+
val_mask.sum().asscalar(),
36+
test_mask.sum().asscalar()))
37+
38+
if args.gpu < 0:
39+
cuda = False
40+
ctx = mx.cpu(0)
41+
else:
42+
cuda = True
43+
ctx = mx.gpu(args.gpu)
44+
45+
features = features.as_in_context(ctx)
46+
labels = labels.as_in_context(ctx)
47+
train_mask = train_mask.as_in_context(ctx)
48+
val_mask = val_mask.as_in_context(ctx)
49+
test_mask = test_mask.as_in_context(ctx)
50+
51+
# graph preprocess and calculate normalization factor
52+
g = data.graph
53+
# add self loop
54+
if args.self_loop:
55+
g.remove_edges_from(g.selfloop_edges())
56+
g.add_edges_from(zip(g.nodes(), g.nodes()))
57+
g = DGLGraph(g)
58+
59+
# create TAGCN model
60+
model = TAGCN(g,
61+
in_feats,
62+
args.n_hidden,
63+
n_classes,
64+
args.n_layers,
65+
mx.nd.relu,
66+
args.dropout)
67+
68+
model.initialize(ctx=ctx)
69+
n_train_samples = train_mask.sum().asscalar()
70+
loss_fcn = gluon.loss.SoftmaxCELoss()
71+
72+
# use optimizer
73+
print(model.collect_params())
74+
trainer = gluon.Trainer(model.collect_params(), 'adam',
75+
{'learning_rate': args.lr, 'wd': args.weight_decay})
76+
77+
# initialize graph
78+
dur = []
79+
for epoch in range(args.n_epochs):
80+
if epoch >= 3:
81+
t0 = time.time()
82+
# forward
83+
with mx.autograd.record():
84+
pred = model(features)
85+
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
86+
loss = loss.sum() / n_train_samples
87+
88+
loss.backward()
89+
trainer.step(batch_size=1)
90+
91+
if epoch >= 3:
92+
loss.asscalar()
93+
dur.append(time.time() - t0)
94+
acc = evaluate(model, features, labels, val_mask)
95+
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
96+
"ETputs(KTEPS) {:.2f}". format(
97+
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
98+
99+
print()
100+
acc = evaluate(model, features, labels, val_mask)
101+
print("Test accuracy {:.2%}".format(acc))
102+
103+
if __name__ == '__main__':
104+
parser = argparse.ArgumentParser(description='TAGCN')
105+
register_data_args(parser)
106+
parser.add_argument("--dropout", type=float, default=0.5,
107+
help="dropout probability")
108+
parser.add_argument("--gpu", type=int, default=-1,
109+
help="gpu")
110+
parser.add_argument("--lr", type=float, default=1e-2,
111+
help="learning rate")
112+
parser.add_argument("--n-epochs", type=int, default=200,
113+
help="number of training epochs")
114+
parser.add_argument("--n-hidden", type=int, default=16,
115+
help="number of hidden tagcn units")
116+
parser.add_argument("--n-layers", type=int, default=1,
117+
help="number of hidden tagcn layers")
118+
parser.add_argument("--weight-decay", type=float, default=5e-4,
119+
help="Weight for L2 loss")
120+
parser.add_argument("--self-loop", action='store_true',
121+
help="graph self-loop (default=False)")
122+
parser.set_defaults(self_loop=False)
123+
args = parser.parse_args()
124+
print(args)
125+
126+
main(args)

examples/pytorch/tagcn/tagcn.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
"""GCN using DGL nn package
1+
"""TAGCN using DGL nn package
22
33
References:
4-
- Semi-Supervised Classification with Graph Convolutional Networks
5-
- Paper: https://arxiv.org/abs/1609.02907
6-
- Code: https://github.com/tkipf/gcn
4+
- Topology Adaptive Graph Convolutional Networks
5+
- Paper: https://arxiv.org/abs/1710.10370
76
"""
87
import torch
98
import torch.nn as nn

python/dgl/nn/mxnet/conv.py

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from . import utils
1010
from ... import function as fn
1111

12-
__all__ = ['GraphConv', 'RelGraphConv']
12+
__all__ = ['GraphConv', 'TAGConv', 'RelGraphConv']
1313

1414
class GraphConv(gluon.Block):
1515
r"""Apply graph convolution over an input signal.
@@ -74,7 +74,7 @@ def __init__(self,
7474

7575
with self.name_scope():
7676
self.weight = self.params.get('weight', shape=(in_feats, out_feats),
77-
init=mx.init.Xavier())
77+
init=mx.init.Xavier(magnitude=math.sqrt(2.0)))
7878
if bias:
7979
self.bias = self.params.get('bias', shape=(out_feats,),
8080
init=mx.init.Zero())
@@ -108,7 +108,7 @@ def forward(self, graph, feat):
108108
graph = graph.local_var()
109109
if self._norm:
110110
degs = graph.in_degrees().astype('float32')
111-
norm = mx.nd.power(degs, -0.5)
111+
norm = mx.nd.power(mx.nd.clip(degs, a_min=1, a_max=float("inf")), -0.5)
112112
shp = norm.shape + (1,) * (feat.ndim - 1)
113113
norm = norm.reshape(shp).as_in_context(feat.context)
114114
feat = feat * norm
@@ -147,6 +147,101 @@ def __repr__(self):
147147
summary += '\n)'
148148
return summary
149149

150+
class TAGConv(gluon.Block):
151+
r"""Apply Topology Adaptive Graph Convolutional Network
152+
153+
.. math::
154+
\mathbf{X}^{\prime} = \sum_{k=0}^K \mathbf{D}^{-1/2} \mathbf{A}
155+
\mathbf{D}^{-1/2}\mathbf{X} \mathbf{\Theta}_{k},
156+
157+
where :math:`\mathbf{A}` denotes the adjacency matrix and
158+
:math:`D_{ii} = \sum_{j=0} A_{ij}` its diagonal degree matrix.
159+
160+
Parameters
161+
----------
162+
in_feats : int
163+
Number of input features.
164+
out_feats : int
165+
Number of output features.
166+
k: int, optional
167+
Number of hops :math: `k`. (default: 2)
168+
bias: bool, optional
169+
If True, adds a learnable bias to the output. Default: ``True``.
170+
activation: callable activation function/layer or None, optional
171+
If not None, applies an activation function to the updated node features.
172+
Default: ``None``.
173+
174+
Attributes
175+
----------
176+
lin : mxnet.gluon.parameter.Parameter
177+
The learnable weight tensor.
178+
bias : mxnet.gluon.parameter.Parameter
179+
The learnable bias tensor.
180+
"""
181+
def __init__(self,
182+
in_feats,
183+
out_feats,
184+
k=2,
185+
bias=True,
186+
activation=None):
187+
super(TAGConv, self).__init__()
188+
self.out_feats = out_feats
189+
self.k = k
190+
self.bias = bias
191+
self.activation = activation
192+
self.in_feats = in_feats
193+
194+
self.lin = self.params.get(
195+
'weight', shape=(self.in_feats * (self.k + 1), self.out_feats),
196+
init=mx.init.Xavier(magnitude=math.sqrt(2.0)))
197+
if self.bias:
198+
self.h_bias = self.params.get('bias', shape=(out_feats,),
199+
init=mx.init.Zero())
200+
201+
def forward(self, graph, feat):
202+
r"""Compute graph convolution
203+
204+
Parameters
205+
----------
206+
graph : DGLGraph
207+
The graph.
208+
feat : mxnet.NDArray
209+
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
210+
is size of input feature, :math:`N` is the number of nodes.
211+
212+
Returns
213+
-------
214+
mxnet.NDArray
215+
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
216+
is size of output feature.
217+
"""
218+
graph = graph.local_var()
219+
220+
degs = graph.in_degrees().astype('float32')
221+
norm = mx.nd.power(mx.nd.clip(degs, a_min=1, a_max=float("inf")), -0.5)
222+
shp = norm.shape + (1,) * (feat.ndim - 1)
223+
norm = norm.reshape(shp).as_in_context(feat.context)
224+
225+
rst = feat
226+
for _ in range(self.k):
227+
rst = rst * norm
228+
graph.ndata['h'] = rst
229+
230+
graph.update_all(fn.copy_src(src='h', out='m'),
231+
fn.sum(msg='m', out='h'))
232+
rst = graph.ndata['h']
233+
rst = rst * norm
234+
feat = mx.nd.concat(feat, rst, dim=-1)
235+
236+
rst = mx.nd.dot(feat, self.lin.data(feat.context))
237+
if self.bias is not None:
238+
rst = rst + self.h_bias.data(rst.context)
239+
240+
if self.activation is not None:
241+
rst = self.activation(rst)
242+
243+
return rst
244+
150245
class RelGraphConv(gluon.Block):
151246
r"""Relational graph convolution layer.
152247

python/dgl/nn/pytorch/conv.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ def extra_repr(self):
171171
summary += ', activation={_activation}'
172172
return summary.format(**self.__dict__)
173173

174-
175174
class GATConv(nn.Module):
176175
r"""Apply `Graph Attention Network <https://arxiv.org/pdf/1710.10903.pdf>`__
177176
over an input signal.
@@ -305,7 +304,7 @@ class TAGConv(nn.Module):
305304
out_feats : int
306305
Output feature size.
307306
k: int, optional
308-
Number of hops :math: `k`. (default: 3)
307+
Number of hops :math: `k`. (default: 2)
309308
bias: bool, optional
310309
If True, adds a learnable bias to the output. Default: ``True``.
311310
activation: callable activation function/layer or None, optional

0 commit comments

Comments
 (0)