returned to og model and removed compile"

Ubuntu · Ubuntu · commit bf0a41b128bc · 2025-07-15T20:11:06.000Z
diff --git a/src/aging_gan/model.py b/src/aging_gan/model.py
@@ -1,89 +1,6 @@
 import torch.nn as nn
 import torch.nn.utils as nn_utils
 import segmentation_models_pytorch as smp
-import torch.nn.functional as F
-
-# ------------------------------------------------------------
-# 9‑residual‑block ResNet generator  (CycleGAN, 256×256)
-# ------------------------------------------------------------
-class ResnetBlock(nn.Module):
-    def __init__(self, channels, padding_type="reflect"):
-        super().__init__()
-        pad = nn.ReflectionPad2d if padding_type == "reflect" else nn.ZeroPad2d
-
-        self.block = nn.Sequential(
-            pad(1),
-            nn.Conv2d(channels, channels, 3, bias=False),
-            nn.InstanceNorm2d(channels, affine=True),
-            nn.ReLU(),
-            nn.Dropout(0.5),
-            pad(1),
-            nn.Conv2d(channels, channels, 3, bias=False),
-            nn.InstanceNorm2d(channels, affine=True),
-        )
-
-    def forward(self, x):
-        return x + self.block(x) # residual add
-
-
-class ResnetGenerator(nn.Module):
-    def __init__(self, in_c=3, out_c=3, n_blocks=9, ngf=64):
-        super().__init__()
-        assert n_blocks >= 1
-
-        layers = [
-            nn.ReflectionPad2d(3),
-            nn.Conv2d(in_c, ngf, 7, bias=False),
-            nn.InstanceNorm2d(ngf, affine=True),
-            nn.ReLU(),
-        ]
-
-        # downsample twice: 256→128→64 spatial, 64→128→256 channels
-        mult = 1
-        for _ in range(2):
-            layers += [
-                nn.Conv2d(ngf * mult, ngf * mult * 2, 3, 2, 1, bias=False),
-                nn.InstanceNorm2d(ngf * mult * 2, affine=True),
-                nn.ReLU(),
-            ]
-            mult *= 2 # 1->2->4
-
-        # residual blocks
-        layers += [ResnetBlock(ngf * mult) for _ in range(n_blocks)]
-
-        # upsample back to 256×256
-        for _ in range(2):
-            layers += [
-                nn.ConvTranspose2d(
-                    ngf * mult, ngf * mult // 2,
-                    3, 2, 1, output_padding=1, bias=False
-                ),
-                nn.InstanceNorm2d(ngf * mult // 2, affine=True),
-                nn.ReLU(),
-            ]
-            mult //= 2 # 4->2->1
-
-        layers += [
-            nn.ReflectionPad2d(3),
-            nn.Conv2d(ngf, out_c, 7), # bias=True is fine here
-            nn.Tanh(),
-        ]
-        self.model = nn.Sequential(*layers)
-
-        # weight init (Conv / ConvT)
-        for m in self.modules():
-            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
-                nn.init.normal_(m.weight, 0.0, 0.02)
-        # InstanceNorm affine params
-        for m in self.modules():
-            if isinstance(m, nn.InstanceNorm2d):
-                nn.init.constant_(m.weight, 1.0)
-                nn.init.constant_(m.bias,   0.0)
-
-    def forward(self, x):
-        return self.model(x)
-
-
 
 # Discriminator: PatchGAN 70x70
 class PatchDiscriminator(nn.Module):
@@ -138,22 +55,19 @@ def unfreeze_encoders(G, F):
 # Initialize and return the generators and discriminators used for training
 def initialize_models():
     # initialize the generators
-    # G = smp.Unet(
-    #     encoder_name="resnet34",
-    #     encoder_weights="imagenet",  # preload low-level filters
-    #     in_channels=3,  # RGB input
-    #     classes=3,  # RGB output
-    # )
-
-    # F = smp.Unet(
-    #     encoder_name="resnet34",
-    #     encoder_weights="imagenet",  # preload low-level filters
-    #     in_channels=3,  # RGB input
-    #     classes=3,  # RGB output
-    # )
-    
-    G = ResnetGenerator()
-    F = ResnetGenerator()
+    G = smp.Unet(
+        encoder_name="resnet34",
+        encoder_weights="imagenet",  # preload low-level filters
+        in_channels=3,  # RGB input
+        classes=3,  # RGB output
+    )
+
+    F = smp.Unet(
+        encoder_name="resnet34",
+        encoder_weights="imagenet",  # preload low-level filters
+        in_channels=3,  # RGB input
+        classes=3,  # RGB output
+    )
 
     # initlize the discriminator
     DX = PatchDiscriminator()
diff --git a/src/aging_gan/train.py b/src/aging_gan/train.py
@@ -15,13 +15,13 @@
 from aging_gan.utils import (
     set_seed,
     load_environ_vars,
-    # print_trainable_parameters,
+    print_trainable_parameters,
     save_checkpoint,
     generate_and_save_samples,
     get_device,
 )
 from aging_gan.data import prepare_dataset
-from aging_gan.model import initialize_models, ResnetGenerator  # , freeze_encoders, unfreeze_encoders
+from aging_gan.model import initialize_models, freeze_encoders, unfreeze_encoders
 from aging_gan.utils import archive_and_terminate
 
 logger = logging.getLogger(__name__)
@@ -45,18 +45,18 @@ def parse_args() -> argparse.Namespace:
         help="Initial learning rate for discriminators.",
     )
     p.add_argument(
-        "--num_train_epochs", type=int, default=80, help="Number of training epochs."
+        "--num_train_epochs", type=int, default=25, help="Number of training epochs."
     )
     p.add_argument(
         "--train_batch_size",
         type=int,
-        default=8,
+        default=16,
         help="Batch size per device during training.",
     )
     p.add_argument(
         "--eval_batch_size",
         type=int,
-        default=16,
+        default=32,
         help="Batch size per device during evaluation.",
     )
 
@@ -135,7 +135,7 @@ def initialize_optimizers(cfg, G, F, DX, DY):
     return opt_G, opt_F, opt_DX, opt_DY
 
 
-def initialize_loss_functions(lambda_cyc_value: int = 10.0, lambda_id_value: int = 5.0):
+def initialize_loss_functions(lambda_cyc_value: int = 2.0, lambda_id_value: int = 0.05):
     mse = nn.MSELoss()
     l1 = nn.L1Loss()
     lambda_cyc = lambda_cyc_value
@@ -498,19 +498,12 @@ def main() -> None:
     # Initialize the generators (G, F) and discriminators (DX, DY)
     G, F, DX, DY = initialize_models()
     # Freeze generator encoderes for training during early epochs
-    # logger.info("Parameters of generator G:")
-    # logger.info(print_trainable_parameters(G))
-    # logger.info("Freezing encoders of generators...")
-    # freeze_encoders(G, F)
-    # logger.info("Parameters of generator G after freezing:")
-    # logger.info(print_trainable_parameters(G))
-    
-    # Compile
-    logger.info("Models compiling...")
-    G = torch.compile(G, backend="aot_eager", fullgraph=False, dynamic=True)
-    F = torch.compile(F, backend="aot_eager", fullgraph=False, dynamic=True)
-    DX = torch.compile(DX, backend="aot_eager", fullgraph=False, dynamic=True)
-    DY = torch.compile(DY, backend="aot_eager", fullgraph=False, dynamic=True)
+    logger.info("Parameters of generator G:")
+    logger.info(print_trainable_parameters(G))
+    logger.info("Freezing encoders of generators...")
+    freeze_encoders(G, F)
+    logger.info("Parameters of generator G after freezing:")
+    logger.info(print_trainable_parameters(G))
     # Initialize optimizers
     (
         opt_G,
@@ -561,12 +554,12 @@ def main() -> None:
     best_fid = float("inf")  # keep track of the best FID score for each epoch
     for epoch in range(1, cfg.num_train_epochs + 1):
         logger.info(f"\nEPOCH {epoch}")
-        # # after 1 full epoch, unfreeze
-        # if epoch == 2:
-        #     logger.info("Unfreezing encoders of generators...")
-        #     unfreeze_encoders(G, F)
-        #     logger.info("Parameters of generator G after unfreezing:")
-        #     logger.info(print_trainable_parameters(G))
+        # after 1 full epoch, unfreeze
+        if epoch == 2:
+            logger.info("Unfreezing encoders of generators...")
+            unfreeze_encoders(G, F)
+            logger.info("Parameters of generator G after unfreezing:")
+            logger.info(print_trainable_parameters(G))
 
         val_metrics = perform_epoch(
             cfg,