add BGNN-Adv and BGNN-MLP methods on bipartite graph

yifanfeng97 · yifanfeng97 · commit 1fcc17e7ae25 · 2022-09-20T19:17:38.000+08:00
diff --git a/dhg/models/graphs/bgnn.py b/dhg/models/graphs/bgnn.py
@@ -1,9 +1,12 @@
-from typing import Tuple
+from typing import Tuple, Callable
 
 import torch
 import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
 
 from dhg.structure.graphs import BiGraph
+from dhg.nn.convs.common import Discriminator
 
 
 class BGNN_Adv(nn.Module):
@@ -15,7 +18,7 @@ class BGNN_Adv(nn.Module):
         ``layer_depth`` (``int``): The depth of layers.
     """
 
-    def __init__(self, u_dim: int, v_dim: int, layer_depth: int = 3,) -> None:
+    def __init__(self, u_dim: int, v_dim: int, layer_depth: int = 3) -> None:
 
         super().__init__()
         self.layer_depth = layer_depth
@@ -45,11 +48,103 @@ def forward(self, X_u: torch.Tensor, X_v: torch.Tensor, g: BiGraph) -> Tuple[tor
                 last_X_v = g.u2v(_tmp, aggr="sum")
         return last_X_u
 
-    def train_with_cascaded(self):
-        pass
+    def train_one_layer(
+        self,
+        X_true: torch.Tensor,
+        X_other: torch.Tensor,
+        mp_func: Callable,
+        layer: nn.Module,
+        lr: float,
+        weight_decay: float,
+        max_epoch: int,
+        drop_rate: float = 0.5,
+        device: str = "cpu",
+    ):
+        netG = layer.to(device)
+        netD = Discriminator(X_true.shape[1], 16, 1, drop_rate=drop_rate).to(device)
 
-    def train_with_end2end(self):
-        pass
+        optimG = optim.Adam(netG.parameters(), lr=lr, weight_decay=weight_decay)
+        optimD = optim.Adam(netD.parameters(), lr=lr, weight_decay=weight_decay)
+
+        X_true, X_other = X_true.to(device), X_other.to(device)
+        lbl_real = torch.ones(X_true.shape[0]).to(device)
+        lbl_fake = torch.zeros(X_true.shape[0]).to(device)
+
+        for _ in range(max_epoch):
+            X_real = X_true
+            X_fake = mp_func(netG(X_other))
+
+            # step 1: train Discriminator
+            optimD.zero_grad()
+
+            pred_real = netD(X_real)
+            pred_fake = netD(X_fake.detach())
+
+            lossD = F.binary_cross_entropy(pred_real, lbl_real) + F.binary_cross_entropy(pred_fake, lbl_fake)
+            lossD.backward()
+            optimD.step()
+
+            # step 2: train Generator
+            optimG.zero_grad()
+
+            pred_fake = netD(X_fake)
+
+            lossG = F.binary_cross_entropy(pred_fake, lbl_real)
+            lossG.backward()
+            optimG.step()
+
+    def train_with_cascaded(
+        self,
+        X_u: torch.Tensor,
+        X_v: torch.Tensor,
+        g: BiGraph,
+        lr: float,
+        weight_decay: float,
+        max_epoch: int,
+        drop_rate: float = 0.5,
+        device: str = "cpu",
+    ):
+        r"""Train the model with cascaded strategy.
+
+        Args:
+            ``X_u`` (``torch.Tensor``): The feature matrix of vertices in set :math:`U`.
+            ``X_v`` (``torch.Tensor``): The feature matrix of vertices in set :math:`V`.
+            ``g`` (``BiGraph``): The bipartite graph.
+            ``lr`` (``float``): The learning rate.
+            ``weight_decay`` (``float``): The weight decay.
+            ``max_epoch`` (``int``): The maximum number of epochs.
+            ``drop_rate`` (``float``): The dropout rate. Default: ``0.5``.
+            ``device`` (``str``): The device to use. Default: ``"cpu"``.
+        """
+        last_X_u, last_X_v = X_u, X_v
+        for _idx in range(self.layer_depth):
+            if _idx % 2 == 0:
+                self.train_one_layer(
+                    last_X_u,
+                    last_X_v,
+                    lambda x: g.v2u(x, aggr="sum"),
+                    self.layers[_idx],
+                    lr,
+                    weight_decay,
+                    max_epoch,
+                    drop_rate,
+                    device,
+                )
+                last_X_u = g.v2u(self.layers[_idx](last_X_v), aggr="sum")
+            else:
+                self.train_one_layer(
+                    last_X_v,
+                    last_X_u,
+                    lambda x: g.u2v(x, aggr="sum"),
+                    self.layers[_idx],
+                    lr,
+                    weight_decay,
+                    max_epoch,
+                    drop_rate,
+                    device,
+                )
+                last_X_v = g.u2v(self.layers[_idx](last_X_u), aggr="sum")
+        return last_X_u
 
 
 class BGNN_MLP(nn.Module):
@@ -58,20 +153,24 @@ class BGNN_MLP(nn.Module):
     Args:
         ``u_dim`` (``int``): The dimension of the vertex feature in set :math:`U`.
         ``v_dim`` (``int``): The dimension of the vertex feature in set :math:`V`.
+        ``hid_dim`` (``int``): The dimension of the hidden layer.
         ``layer_depth`` (``int``): The depth of layers.
     """
 
-    def __init__(self, u_dim: int, v_dim: int, layer_depth: int = 3,) -> None:
+    def __init__(self, u_dim: int, v_dim: int, hid_dim: int, layer_depth: int = 3,) -> None:
 
         super().__init__()
         self.layer_depth = layer_depth
         self.layers = nn.ModuleList()
+        self.decoders = nn.ModuleList()
 
         for _idx in range(layer_depth):
             if _idx % 2 == 0:
-                self.layers.append(nn.Linear(v_dim, u_dim))
+                self.layers.append(nn.Linear(v_dim, hid_dim))
+                self.decoders.append(nn.Linear(hid_dim, u_dim))
             else:
-                self.layers.append(nn.Linear(u_dim, v_dim))
+                self.layers.append(nn.Linear(u_dim, hid_dim))
+                self.decoders.append(nn.Linear(hid_dim, v_dim))
 
     def forward(self, X_u: torch.Tensor, X_v: torch.Tensor, g: BiGraph) -> Tuple[torch.Tensor, torch.Tensor]:
         r"""The forward function.
@@ -91,8 +190,95 @@ def forward(self, X_u: torch.Tensor, X_v: torch.Tensor, g: BiGraph) -> Tuple[tor
                 last_X_v = g.u2v(_tmp, aggr="sum")
         return last_X_u
 
-    def train_with_cascaded(self):
-        pass
+    def train_one_layer(
+        self,
+        X_true: torch.Tensor,
+        X_other: torch.Tensor,
+        mp_func: Callable,
+        layer: nn.Module,
+        decoder: nn.Module,
+        lr: float,
+        weight_decay: float,
+        max_epoch: int,
+        device: str = "cpu",
+    ):
+        netG = layer.to(device)
+        netD = decoder.to(device)
+
+        optimizer = optim.Adam([*netG.parameters(), *netD.parameters()], lr=lr, weight_decay=weight_decay)
+
+        X_true, X_other = X_true.to(device), X_other.to(device)
+
+        for _ in range(max_epoch):
+            X_real = X_true
+            X_fake = netD(mp_func(netG(X_other)))
+
+            optimizer.zero_grad()
+            loss = F.mse_loss(X_fake, X_real)
+            loss.backward()
+            optimizer.step()
+
+    def train_with_cascaded(
+        self,
+        X_u: torch.Tensor,
+        X_v: torch.Tensor,
+        g: BiGraph,
+        lr: float,
+        weight_decay: float,
+        max_epoch: int,
+        device: str = "cpu",
+    ):
+        r"""Train the model with cascaded strategy.
+
+        Args:
+            ``X_u`` (``torch.Tensor``): The feature matrix of vertices in set :math:`U`.
+            ``X_v`` (``torch.Tensor``): The feature matrix of vertices in set :math:`V`.
+            ``g`` (``BiGraph``): The bipartite graph.
+            ``lr`` (``float``): The learning rate.
+            ``weight_decay`` (``float``): The weight decay.
+            ``max_epoch`` (``int``): The maximum number of epochs.
+            ``device`` (``str``): The device to use. Default: ``"cpu"``.
+        """
+        last_X_u, last_X_v = X_u, X_v
+        for _idx in range(self.layer_depth):
+            if _idx % 2 == 0:
+                self.train_one_layer(
+                    last_X_u,
+                    last_X_v,
+                    lambda x: g.v2u(x, aggr="sum"),
+                    self.layers[_idx],
+                    lr,
+                    weight_decay,
+                    max_epoch,
+                    device,
+                )
+                last_X_u = g.v2u(self.layers[_idx](last_X_v), aggr="sum")
+            else:
+                self.train_one_layer(
+                    last_X_v,
+                    last_X_u,
+                    lambda x: g.u2v(x, aggr="sum"),
+                    self.layers[_idx],
+                    lr,
+                    weight_decay,
+                    max_epoch,
+                    device,
+                )
+                last_X_v = g.u2v(self.layers[_idx](last_X_u), aggr="sum")
+        return last_X_u
+
+
+class Decoder(nn.Module):
+    def __init__(self, in_channels: int, hid_channels: int, out_channels: int, drop_rate: float = 0.5):
+        super(Decoder, self).__init__()
+        self.layers = nn.Sequential(
+            nn.Linear(in_channels, hid_channels),
+            nn.ReLU(),
+            nn.Dropout(p=drop_rate, inplace=True),
+            nn.Linear(hid_channels, out_channels),
+            nn.Tanh(),
+        )
 
-    def train_with_end2end(self):
-        pass
+    def forward(self, X):
+        X = self.layers(X)
+        return X
diff --git a/dhg/nn/__init__.py b/dhg/nn/__init__.py
@@ -1,4 +1,4 @@
-from .convs.common import MLP, MultiHeadWrapper
+from .convs.common import MLP, MultiHeadWrapper, Discriminator
 from .convs.graphs import GCNConv, GATConv, GraphSAGEConv, GINConv
 from .convs.hypergraphs import (
     HGNNConv,
@@ -17,6 +17,7 @@
 __all__ = [
     "MLP",
     "MultiHeadWrapper",
+    "Discriminator",
     "GCNConv",
     "GATConv",
     "GraphSAGEConv",
diff --git a/dhg/nn/convs/common.py b/dhg/nn/convs/common.py
@@ -106,3 +106,33 @@ def forward(self, **kwargs) -> torch.Tensor:
             else:
                 raise ValueError("Unknown readout type")
 
+
+class Discriminator(nn.Module):
+    r"""The Discriminator for Generative Adversarial Networks (GANs).
+
+    Args:
+        ``in_channels`` (``int``): The number of input channels.
+        ``hid_channels`` (``int``): The number of hidden channels.
+        ``out_channels`` (``int``): The number of output channels.
+        ``drop_rate`` (``float``): Dropout ratio. Defaults to ``0.5``.
+    """
+
+    def __init__(self, in_channels: int, hid_channels: int, out_channels: int, drop_rate: float = 0.5):
+
+        super(Discriminator, self).__init__()
+        self.layers = nn.Sequential(
+            nn.Linear(in_channels, hid_channels),
+            nn.LeakyReLU(),
+            nn.Dropout(p=drop_rate),
+            nn.Linear(hid_channels, out_channels),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, X):
+        """The forward function.
+        
+        Args:
+            ``X`` (``torch.Tensor``): The input tensor.
+        """
+        X = self.layers(X)
+        return X
diff --git a/docs/source/_templates/model_template.rst b/docs/source/_templates/model_template.rst
@@ -7,4 +7,4 @@
 
 .. autoclass:: {{ name }}
     :show-inheritance:
-    :members: forward
+    :members: forward, train_with_cascaded
diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst
@@ -17,6 +17,7 @@ Models on Graph
 
 Models on Bipartite Graph
 -----------------------------
+
 .. autosummary::
    :toctree: ../generated/
    :nosignatures:
diff --git a/docs/source/api/nn.rst b/docs/source/api/nn.rst
@@ -13,6 +13,7 @@ Common Layers
 
    dhg.nn.MLP
    dhg.nn.MultiHeadWrapper
+   dhg.nn.Discriminator
 
 
 Layers on Graph