update bgnn-adv and bgnn-mlp

yifanfeng97 · yifanfeng97 · commit d83ab948e196 · 2022-09-21T17:31:05.000+08:00
diff --git a/dhg/data/citeseer.py b/dhg/data/citeseer.py
@@ -77,7 +77,7 @@ class CiteseerBiGraph(BaseData):
     - ``num_v_vertices``: The number of vertices in set :math:`V` : :math:`742`.
     - ``num_edges``: The number of edges: :math:`1,665`.
     - ``dim_u_features``: The dimension of features in set :math:`U` : :math:`3,703`.
-    - ``dim_v_features``: The dimension of features: :math:`3,703`.
+    - ``dim_v_features``: The dimension of features in set :math:`V` : :math:`3,703`.
     - ``u_features``: The vertex feature matrix in set :math:`U`. ``torch.Tensor`` with size :math:`(1,237 \times 3,703)`.
     - ``v_features``: The vertex feature matrix in set :math:`V` . ``torch.Tensor`` with size :math:`(742 \times 3,703)`.
     - ``edge_list``: The edge list. ``List`` with length :math:`(1,665 \times 2)`.
diff --git a/dhg/data/cora.py b/dhg/data/cora.py
@@ -79,7 +79,7 @@ class CoraBiGraph(BaseData):
     - ``num_v_vertices``: The number of vertices in set :math:`V` : :math:`789`.
     - ``num_edges``: The number of edges: :math:`2,314`.
     - ``dim_u_features``: The dimension of features in set :math:`U` : :math:`1,433`.
-    - ``dim_v_features``: The dimension of features: :math:`1,433`.
+    - ``dim_v_features``: The dimension of features in set :math:`V` : :math:`1,433`.
     - ``u_features``: The vertex feature matrix in set :math:`U`. ``torch.Tensor`` with size :math:`(1,312 \times 1,433)`.
     - ``v_features``: The vertex feature matrix in set :math:`V` . ``torch.Tensor`` with size :math:`(789 \times 1,433)`.
     - ``edge_list``: The edge list. ``List`` with length :math:`(2,314 \times 2)`.
diff --git a/dhg/data/pubmed.py b/dhg/data/pubmed.py
@@ -77,7 +77,7 @@ class PubmedBiGraph(BaseData):
     - ``num_v_vertices``: The number of vertices in set :math:`V` : :math:`3,435`.
     - ``num_edges``: The number of edges: :math:`18,782`.
     - ``dim_u_features``: The dimension of features in set :math:`U` : :math:`400`.
-    - ``dim_v_features``: The dimension of features: :math:`500`.
+    - ``dim_v_features``: The dimension of features in set :math:`V` : :math:`500`.
     - ``u_features``: The vertex feature matrix in set :math:`U`. ``torch.Tensor`` with size :math:`(13,424 \times 400)`.
     - ``v_features``: The vertex feature matrix in set :math:`V` . ``torch.Tensor`` with size :math:`(3,435 \times 500)`.
     - ``edge_list``: The edge list. ``List`` with length :math:`(2,314 \times 2)`.
diff --git a/dhg/data/tencent.py b/dhg/data/tencent.py
@@ -24,7 +24,7 @@ class TencentBiGraph(BaseData):
     - ``num_v_vertices``: The number of vertices in set :math:`V` : :math:`90,044`.
     - ``num_edges``: The number of edges: :math:`144,501`.
     - ``dim_u_features``: The dimension of features in set :math:`U` : :math:`8`.
-    - ``dim_v_features``: The dimension of features: :math:`16`.
+    - ``dim_v_features``: The dimension of features in set :math:`V` : :math:`16`.
     - ``u_features``: The vertex feature matrix in set :math:`U`. ``torch.Tensor`` with size :math:`(619,030 \times 8)`.
     - ``v_features``: The vertex feature matrix in set :math:`V` . ``torch.Tensor`` with size :math:`(90,044 \times 16)`.
     - ``edge_list``: The edge list. ``List`` with length :math:`(991,713 \times 2)`.
diff --git a/dhg/datapipe/__init__.py b/dhg/datapipe/__init__.py
@@ -5,11 +5,12 @@
     to_long_tensor,
 )
 from .loader import load_from_pickle, load_from_json, load_from_txt
-from .normalize import norm_ft
+from .normalize import norm_ft, min_max_scaler
 
 __all__ = [
     "compose_pipes",
     "norm_ft",
+    "min_max_scaler",
     "to_tensor",
     "to_bool_tensor",
     "to_long_tensor",
diff --git a/dhg/datapipe/normalize.py b/dhg/datapipe/normalize.py
@@ -2,9 +2,7 @@
 import torch
 
 
-def norm_ft(
-    X: torch.Tensor, ord: Optional[Union[int, float]] = None
-) -> torch.Tensor:
+def norm_ft(X: torch.Tensor, ord: Optional[Union[int, float]] = None) -> torch.Tensor:
     r"""Normalize the input feature matrix with specified ``ord`` refer to pytorch's `torch.linalg.norm <https://pytorch.org/docs/stable/generated/torch.linalg.norm.html#torch.linalg.norm>`_ function.
 
     .. note::
@@ -40,3 +38,33 @@ def norm_ft(
             "The input feature matrix is expected to be a 1D verter or a 2D tensor with shape (num_samples, num_features)."
         )
 
+
+def min_max_scaler(X: torch.Tensor, ft_min: float, ft_max: float) -> torch.Tensor:
+    r"""Normalize the input feature matrix with min-max scaling.
+
+    Args:
+        ``X`` (``torch.Tensor``): The input feature.
+        ``ft_min`` (``float``): The minimum value of the output feature.
+        ``ft_max`` (``float``): The maximum value of the output feature.
+
+    Examples:
+        >>> import dhg.datapipe as dd
+        >>> import torch
+        >>> X = torch.tensor([
+                    [0.1, 0.2, 0.5],
+                    [0.5, 0.2, 0.3],
+                    [0.3, 0.2, 0.0]
+                ])
+        >>> dd.min_max_scaler(X, -1, 1)
+        tensor([[-0.6000, -0.2000,  1.0000],
+                [ 1.0000, -0.2000,  0.2000],
+                [ 0.2000, -0.2000, -1.0000]])
+    """
+    assert ft_min < ft_max, "The minimum value of the feature should be less than the maximum value."
+    X_min, X_max = X.min().item(), X.max().item()
+    X_range = X_max - X_min
+    scale_ = (ft_max - ft_min) / X_range
+    min_ = ft_min - X_min * scale_
+    X = X * scale_ + min_
+    return X
+
diff --git a/dhg/models/graphs/bgnn.py b/dhg/models/graphs/bgnn.py
@@ -42,10 +42,10 @@ def forward(self, X_u: torch.Tensor, X_v: torch.Tensor, g: BiGraph) -> Tuple[tor
         for _idx in range(self.layer_depth):
             if _idx % 2 == 0:
                 _tmp = self.layers[_idx](last_X_v)
-                last_X_u = g.v2u(_tmp, aggr="sum")
+                last_X_u = torch.tanh(g.v2u(_tmp, aggr="sum"))
             else:
                 _tmp = self.layers[_idx](last_X_u)
-                last_X_v = g.u2v(_tmp, aggr="sum")
+                last_X_v = torch.tanh(g.u2v(_tmp, aggr="sum"))
         return last_X_u
 
     def train_one_layer(
@@ -63,36 +63,36 @@ def train_one_layer(
         netG = layer.to(device)
         netD = Discriminator(X_true.shape[1], 16, 1, drop_rate=drop_rate).to(device)
 
-        optimG = optim.Adam(netG.parameters(), lr=lr, weight_decay=weight_decay)
-        optimD = optim.Adam(netD.parameters(), lr=lr, weight_decay=weight_decay)
+        optimizer_G = optim.Adam(netG.parameters(), lr=lr, weight_decay=weight_decay)
+        optimizer_D = optim.Adam(netD.parameters(), lr=lr, weight_decay=weight_decay)
 
-        X_true, X_other = X_true.to(device), X_other.to(device)
-        lbl_real = torch.ones(X_true.shape[0]).to(device)
-        lbl_fake = torch.zeros(X_true.shape[0]).to(device)
+        X_true, X_other = X_true.detach().to(device), X_other.detach().to(device)
+        lbl_real = torch.ones(X_true.shape[0], 1, requires_grad=False).to(device)
+        lbl_fake = torch.zeros(X_true.shape[0], 1, requires_grad=False).to(device)
 
         netG.train(), netD.train()
         for _ in range(max_epoch):
             X_real = X_true
-            X_fake = mp_func(netG(X_other))
+            X_fake = torch.tanh(mp_func(netG(X_other)))
 
             # step 1: train Discriminator
-            optimD.zero_grad()
+            optimizer_D.zero_grad()
 
             pred_real = netD(X_real)
             pred_fake = netD(X_fake.detach())
 
-            lossD = F.binary_cross_entropy(pred_real, lbl_real) + F.binary_cross_entropy(pred_fake, lbl_fake)
-            lossD.backward()
-            optimD.step()
+            loss_D = F.binary_cross_entropy(pred_real, lbl_real) + F.binary_cross_entropy(pred_fake, lbl_fake)
+            loss_D.backward()
+            optimizer_D.step()
 
             # step 2: train Generator
-            optimG.zero_grad()
+            optimizer_G.zero_grad()
 
             pred_fake = netD(X_fake)
 
-            lossG = F.binary_cross_entropy(pred_fake, lbl_real)
-            lossG.backward()
-            optimG.step()
+            loss_G = F.binary_cross_entropy(pred_fake, lbl_real)
+            loss_G.backward()
+            optimizer_G.step()
 
     def train_with_cascaded(
         self,
@@ -117,7 +117,8 @@ def train_with_cascaded(
             ``drop_rate`` (``float``): The dropout rate. Default: ``0.5``.
             ``device`` (``str``): The device to use. Default: ``"cpu"``.
         """
-        last_X_u, last_X_v = X_u, X_v
+        self = self.to(device)
+        last_X_u, last_X_v = X_u.to(device), X_v.to(device)
         for _idx in range(self.layer_depth):
             if _idx % 2 == 0:
                 self.train_one_layer(
@@ -131,7 +132,8 @@ def train_with_cascaded(
                     drop_rate,
                     device,
                 )
-                last_X_u = g.v2u(self.layers[_idx](last_X_v), aggr="sum")
+                with torch.no_grad():
+                    last_X_u = torch.tanh(g.v2u(self.layers[_idx](last_X_v), aggr="sum"))
             else:
                 self.train_one_layer(
                     last_X_v,
@@ -144,7 +146,8 @@ def train_with_cascaded(
                     drop_rate,
                     device,
                 )
-                last_X_v = g.u2v(self.layers[_idx](last_X_u), aggr="sum")
+                with torch.no_grad():
+                    last_X_v = torch.tanh(g.u2v(self.layers[_idx](last_X_u), aggr="sum"))
         return last_X_u
 
 
@@ -155,10 +158,14 @@ class BGNN_MLP(nn.Module):
         ``u_dim`` (``int``): The dimension of the vertex feature in set :math:`U`.
         ``v_dim`` (``int``): The dimension of the vertex feature in set :math:`V`.
         ``hid_dim`` (``int``): The dimension of the hidden layer.
-        ``layer_depth`` (``int``): The depth of layers.
+        ``decoder_hid_dim`` (``int``): The dimension of the hidden layer in the decoder.
+        ``drop_rate`` (``float``): The dropout rate. Default: ``0.5``.
+        ``layer_depth`` (``int``): The depth of layers. Default: ``3``.
     """
 
-    def __init__(self, u_dim: int, v_dim: int, hid_dim: int, layer_depth: int = 3,) -> None:
+    def __init__(
+        self, u_dim: int, v_dim: int, hid_dim: int, decoder_hid_dim: int, drop_rate: float = 0.5, layer_depth: int = 3,
+    ) -> None:
 
         super().__init__()
         self.layer_depth = layer_depth
@@ -168,10 +175,10 @@ def __init__(self, u_dim: int, v_dim: int, hid_dim: int, layer_depth: int = 3,)
         for _idx in range(layer_depth):
             if _idx % 2 == 0:
                 self.layers.append(nn.Linear(v_dim, hid_dim))
-                self.decoders.append(nn.Linear(hid_dim, u_dim))
+                self.decoders.append(Decoder(hid_dim, decoder_hid_dim, u_dim, drop_rate=drop_rate))
             else:
                 self.layers.append(nn.Linear(u_dim, hid_dim))
-                self.decoders.append(nn.Linear(hid_dim, v_dim))
+                self.decoders.append(Decoder(hid_dim, decoder_hid_dim, v_dim, drop_rate=drop_rate))
 
     def forward(self, X_u: torch.Tensor, X_v: torch.Tensor, g: BiGraph) -> Tuple[torch.Tensor, torch.Tensor]:
         r"""The forward function.
@@ -185,10 +192,10 @@ def forward(self, X_u: torch.Tensor, X_v: torch.Tensor, g: BiGraph) -> Tuple[tor
         for _idx in range(self.layer_depth):
             if _idx % 2 == 0:
                 _tmp = self.layers[_idx](last_X_v)
-                last_X_u = g.v2u(_tmp, aggr="sum")
+                last_X_u = self.decoders[_idx](torch.tanh(g.v2u(_tmp, aggr="sum")))
             else:
                 _tmp = self.layers[_idx](last_X_u)
-                last_X_v = g.u2v(_tmp, aggr="sum")
+                last_X_v = self.decoders[_idx](torch.tanh(g.u2v(_tmp, aggr="sum")))
         return last_X_u
 
     def train_one_layer(
@@ -208,12 +215,12 @@ def train_one_layer(
 
         optimizer = optim.Adam([*netG.parameters(), *netD.parameters()], lr=lr, weight_decay=weight_decay)
 
-        X_true, X_other = X_true.to(device), X_other.to(device)
+        X_true, X_other = X_true.detach().to(device), X_other.detach().to(device)
 
         netG.train(), netD.train()
         for _ in range(max_epoch):
             X_real = X_true
-            X_fake = netD(mp_func(netG(X_other)))
+            X_fake = netD(torch.tanh(mp_func(netG(X_other))))
 
             optimizer.zero_grad()
             loss = F.mse_loss(X_fake, X_real)
@@ -241,42 +248,49 @@ def train_with_cascaded(
             ``max_epoch`` (``int``): The maximum number of epochs.
             ``device`` (``str``): The device to use. Default: ``"cpu"``.
         """
-        last_X_u, last_X_v = X_u, X_v
+        self = self.to(device)
+        last_X_u, last_X_v = X_u.to(device), X_v.to(device)
         for _idx in range(self.layer_depth):
             if _idx % 2 == 0:
                 self.train_one_layer(
                     last_X_u,
                     last_X_v,
                     lambda x: g.v2u(x, aggr="sum"),
                     self.layers[_idx],
+                    self.decoders[_idx],
                     lr,
                     weight_decay,
                     max_epoch,
                     device,
                 )
-                last_X_u = g.v2u(self.layers[_idx](last_X_v), aggr="sum")
+                with torch.no_grad():
+                    self.decoders[_idx].eval()
+                    last_X_u = self.decoders[_idx](torch.tanh(g.v2u(self.layers[_idx](last_X_v), aggr="sum")))
             else:
                 self.train_one_layer(
                     last_X_v,
                     last_X_u,
                     lambda x: g.u2v(x, aggr="sum"),
                     self.layers[_idx],
+                    self.decoders[_idx],
                     lr,
                     weight_decay,
                     max_epoch,
                     device,
                 )
-                last_X_v = g.u2v(self.layers[_idx](last_X_u), aggr="sum")
+                with torch.no_grad():
+                    self.decoders[_idx].eval()
+                    last_X_v = self.decoders[_idx](torch.tanh(g.u2v(self.layers[_idx](last_X_u), aggr="sum")))
         return last_X_u
 
 
 class Decoder(nn.Module):
     def __init__(self, in_channels: int, hid_channels: int, out_channels: int, drop_rate: float = 0.5):
-        super(Decoder, self).__init__()
+        super().__init__()
         self.layers = nn.Sequential(
             nn.Linear(in_channels, hid_channels),
             nn.ReLU(),
-            nn.Dropout(p=drop_rate, inplace=True),
+            nn.Dropout(p=drop_rate),
             nn.Linear(hid_channels, out_channels),
             nn.Tanh(),
         )
diff --git a/docs/source/api/datapipe.rst b/docs/source/api/datapipe.rst
@@ -15,6 +15,8 @@ Transforms
 
 .. autofunction:: dhg.datapipe.norm_ft
 
+.. autofunction:: dhg.datapipe.min_max_scaler
+
 .. autofunction:: dhg.datapipe.to_tensor 
 
 .. autofunction:: dhg.datapipe.to_bool_tensor 
diff --git a/examples/node_classification/bgnn_on_corabigraph.py b/examples/node_classification/bgnn_on_corabigraph.py