Merge pull request #62 from joshchang1112:master

ppham27 · ppham27 · commit 3bea5a430778 · 2020-08-11T13:30:52.000Z
PiperOrigin-RevId: 325933747
diff --git a/research/gnn-survey/download_dataset.sh b/research/gnn-survey/download_dataset.sh
@@ -0,0 +1,22 @@
+# URL for downloading Cora dataset.
+URL=https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz
+
+# Target folder to store and process data.
+DATA_DIR=data
+
+# Helper function to download the data.
+function download () {
+  fileurl=${1}
+  filedir=${2}
+  filename=${fileurl##*/}
+  if [ ! -f ${filename} ]; then
+    echo ">>> Downloading '${filename}' from '${fileurl}' to '${filedir}'"
+    wget --quiet --no-check-certificate -P ${filedir} ${fileurl}
+  else
+    echo "*** File '${filename}' exists; no need to download it."
+  fi
+}
+
+# Download and unzip the dataset. Data will be at '${DATA_DIR}/cora/' folder.
+download ${URL} ${DATA_DIR}
+tar -C ${DATA_DIR} -xvzf ${DATA_DIR}/cora.tgz
diff --git a/research/gnn-survey/layers.py b/research/gnn-survey/layers.py
@@ -0,0 +1,54 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""GNN layers."""
+import tensorflow as tf
+
+
+class GraphConvLayer(tf.keras.layers.Layer):
+  """Single graph convolution layer."""
+
+  def __init__(self, output_dim, bias, **kwargs):
+    """Initializes the GraphConvLayer.
+
+    Args:
+      output_dim: (int) Output dimension of gcn layer
+      bias: (bool) Whether bias needs to be added to the layer
+      **kwargs: Keyword arguments for tf.keras.layers.Layer.
+    """
+    super(GraphConvLayer, self).__init__(**kwargs)
+    self.output_dim = output_dim
+    self.bias = bias
+
+  def build(self, input_shape):
+    super(GraphConvLayer, self).build(input_shape)
+    self.weight = self.add_weight(
+        name='weight',
+        shape=(input_shape[0][-1], self.output_dim),
+        initializer='random_normal',
+        trainable=True)
+    if self.bias:
+      self.b = self.add_weight(
+          name='bias',
+          shape=(self.output_dim,),
+          initializer='random_normal',
+          trainable=True)
+
+  def call(self, inputs):
+    x, adj = inputs[0], inputs[1]
+    x = tf.matmul(adj, x)
+    outputs = tf.matmul(x, self.weight)
+    if self.bias:
+      return self.b + outputs
+    else:
+      return outputs
diff --git a/research/gnn-survey/models.py b/research/gnn-survey/models.py
@@ -0,0 +1,91 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Modeling for GNNs."""
+from layers import GraphConvLayer
+import tensorflow as tf
+
+
+class GCNBlock(tf.keras.layers.Layer):
+  """Graph convolutional block."""
+
+  def __init__(self, hidden_dim, dropout_rate, bias, **kwargs):
+    """Initializes a GGN block.
+
+    Args:
+      hidden_dim: (int) Dimension of hidden layer.
+      dropout_rate: (float) Dropout probability
+      bias: (bool) Whether bias needs to be added to gcn layers
+      **kwargs: Keyword arguments for tf.keras.layers.Layer.
+    """
+    super(GCNBlock, self).__init__(**kwargs)
+    self.hidden_dim = hidden_dim
+    self.dropout_rate = dropout_rate
+    self.bias = bias
+
+    self._activation = tf.keras.layers.ReLU()
+    self._dropout = tf.keras.layers.Dropout(self.dropout_rate)
+
+  def build(self, input_shape):
+    super(GCNBlock, self).build(input_shape)
+    self._graph_conv_layer = GraphConvLayer(self.hidden_dim, bias=self.bias)
+
+  def call(self, inputs):
+    x = self._graph_conv_layer(inputs)
+    x = self._activation(x)
+    return self._dropout(x)
+
+
+class GCN(tf.keras.Model):
+  """Graph convolution network for semi-supevised node classification."""
+
+  def __init__(self, num_layers, hidden_dim, num_classes, dropout_rate, bias,
+               **kwargs):
+    """Initializes a GGN model.
+
+    Args:
+      num_layers: (int) Number of gnn layers
+      hidden_dim: (list) List of hidden layers dimension
+      num_classes: (int) Total number of classes
+      dropout_rate: (float) Dropout probability
+      bias: (bool) Whether bias needs to be added to gcn layers
+      **kwargs: Keyword arguments for tf.keras.Model.
+    """
+    super(GCN, self).__init__(**kwargs)
+    self.num_layers = num_layers
+    self.hidden_dim = hidden_dim
+    self.num_classes = num_classes
+    self.dropout_rate = dropout_rate
+    self.bias = bias
+    # input layer
+    self.gc = [
+        GCNBlock(self.hidden_dim[0], dropout_rate=dropout_rate, bias=bias),
+    ]
+
+    # hidden layers
+    for i in range(1, self.num_layers - 1):
+      self.gc.append(
+          GCNBlock(self.hidden_dim[i], dropout_rate=dropout_rate, bias=bias))
+
+    # output layer
+    self.classifier = GraphConvLayer(self.num_classes, bias=self.bias)
+
+  def call(self, inputs):
+    features, adj = inputs[0], inputs[1]
+    for i in range(self.num_layers - 1):
+      x = (features, adj)
+      features = self.gc[i](x)
+
+    x = (features, adj)
+    outputs = self.classifier(x)
+    return outputs
diff --git a/research/gnn-survey/train.py b/research/gnn-survey/train.py
@@ -0,0 +1,110 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Trains a GNN."""
+import time
+
+from absl import app
+from absl import flags
+import tensorflow as tf
+
+from utils import load_dataset, build_model, cal_acc  # pylint: disable=g-multiple-import
+
+flags.DEFINE_enum('dataset', 'cora', ['cora'],
+                  'The input dataset. Avaliable dataset now: cora')
+flags.DEFINE_enum('model', 'gcn', ['gcn'],
+                  'GNN model. Available model now: gcn')
+flags.DEFINE_float('dropout_rate', 0.5, 'Dropout probability')
+flags.DEFINE_integer('gpu', '-1', 'Gpu id, -1 means cpu only')
+flags.DEFINE_float('lr', 1e-2, 'Initial learning rate')
+flags.DEFINE_integer('epochs', 200, 'Number of training epochs')
+flags.DEFINE_integer('num_layers', 2, 'Number of gnn layers')
+flags.DEFINE_list('hidden_dim', [32], 'Dimension of gnn hidden layers')
+flags.DEFINE_enum('optimizer', 'adam', ['adam', 'sgd'],
+                  'Optimizer for training')
+flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 regularization')
+flags.DEFINE_string('save_dir', 'models/cora/gcn',
+                    'Directory stores trained model')
+
+FLAGS = flags.FLAGS
+
+
+def train(model, adj, features, labels, idx_train, idx_val, idx_test):
+  """Train gnn model."""
+  loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+  best_val_acc = 0.0
+
+  if FLAGS.optimizer == 'adam':
+    optimizer = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr)
+  elif FLAGS.optimizer == 'sgd':
+    optimizer = tf.keras.optimizers.SGD(learning_rate=FLAGS.lr)
+
+  inputs = (features, adj)
+  for epoch in range(FLAGS.epochs):
+    epoch_start_time = time.time()
+
+    with tf.GradientTape() as tape:
+      output = model(inputs)
+      train_loss = loss_fn(labels[idx_train], output[idx_train])
+      # L2 regularization
+      for weight in model.trainable_weights:
+        train_loss += FLAGS.weight_decay * tf.nn.l2_loss(weight)
+
+    gradients = tape.gradient(train_loss, model.trainable_variables)
+    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
+
+    train_acc = cal_acc(labels[idx_train], output[idx_train])
+
+    # Evaluate
+    output = model(inputs, training=False)
+    val_loss = loss_fn(labels[idx_val], output[idx_val])
+    val_acc = cal_acc(labels[idx_val], output[idx_val])
+
+    if val_acc > best_val_acc:
+      best_val_acc = val_acc
+      model.save(FLAGS.save_dir)
+
+    print('[%03d/%03d] %.2f sec(s) Train Acc: %.3f Loss: %.6f | Val Acc: %.3f loss: %.6f' % \
+         (epoch + 1, FLAGS.epochs, time.time()-epoch_start_time, \
+          train_acc, train_loss, val_acc, val_loss))
+
+  print('Start Predicting...')
+  model = tf.keras.models.load_model(FLAGS.save_dir)
+  output = model(inputs, training=False)
+  test_acc = cal_acc(labels[idx_test], output[idx_test])
+  print('***Test Accuracy: %.3f***' % test_acc)
+
+
+def main(_):
+
+  if FLAGS.gpu == -1:
+    device = '/cpu:0'
+  else:
+    device = '/gpu:{}'.format(FLAGS.gpu)
+
+  with tf.device(device):
+    tf.random.set_seed(1234)
+    # Load the dataset and process features and adj matrix
+    print('Loading {} dataset...'.format(FLAGS.dataset))
+    adj, features, labels, idx_train, idx_val, idx_test = load_dataset(
+        FLAGS.dataset)
+    num_classes = max(labels) + 1
+    print('Build model...')
+    model = build_model(FLAGS.model, FLAGS.num_layers, FLAGS.hidden_dim,
+                        num_classes, FLAGS.dropout_rate)
+    print('Start Training...')
+    train(model, adj, features, labels, idx_train, idx_val, idx_test)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/research/gnn-survey/utils.py b/research/gnn-survey/utils.py