Merge pull request to add-best-practice-docstrings-and-annotations

codinglabsong · web-flow · commit 8210454fd7c7 · 2025-07-17T13:37:53.000-07:00
Add concise docstrings and type hints
diff --git a/src/aging_gan/data.py b/src/aging_gan/data.py
@@ -1,20 +1,22 @@
+"""Dataset and dataloader utilities for the UTKFace dataset."""
+
 import os
 import logging
-import torch
-from torch.utils.data import DataLoader, Subset, Dataset
-import torchvision.transforms as T
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Tuple
+
+import torch
 from PIL import Image
+from torch import Tensor
+from torch.utils.data import DataLoader, Dataset, Subset
+import torchvision.transforms as T
 
 logger = logging.getLogger(__name__)
 
 
 class UTKFace(Dataset):
-    """
-    Assumes the unzipped aligned UTKFace images live in  <root>/data/utkface_aligned_cropped/UTKFace
-    File pattern:  {age}_{gender}_{race}_{yyyymmddHHMMSS}.jpg
-    """
+    """Lightweight UTKFace dataset reader."""
 
     def __init__(self, root: str, transform: T.Compose | None = None):
         self.root = (
@@ -29,11 +31,13 @@ def __init__(self, root: str, transform: T.Compose | None = None):
         self.transform = transform
 
     def __len__(self) -> int:
+        """Return the number of images in the dataset."""
         return len(self.files)
 
-    def __getitem__(self, idx):
+    def __getitem__(self, idx: int) -> Tuple[Tensor, int]:
+        """Return the transformed image and associated age label."""
         path = self.files[idx]
-        age = int(path.name.split("_")[0])  # first token of file name is age
+        age = int(path.name.split("_")[0])
         img = Image.open(path).convert("RGB")
         if self.transform:
             img = self.transform(img)
@@ -49,7 +53,8 @@ def make_unpaired_loader(
     seed: int = 42,
     young_max: int = 28,  # 18-28
     old_min: int = 40,  # 40+
-):
+) -> DataLoader:
+    """Return a dataloader yielding unpaired young/old image tuples."""
     full_ds = UTKFace(root, transform)
 
     # Split into young, old indices
@@ -125,7 +130,8 @@ def prepare_dataset(
     num_workers: int = 2,
     img_size: int = 256,
     seed: int = 42,
-):
+) -> tuple[DataLoader, DataLoader, DataLoader]:
+    """Create train/validation/test dataloaders for UTKFace."""
     data_dir = Path(__file__).resolve().parents[2] / "data"
     os.makedirs(data_dir, exist_ok=True)
 
diff --git a/src/aging_gan/inference.py b/src/aging_gan/inference.py
@@ -1,3 +1,5 @@
+"""Command-line interface for running a trained generator on a single image."""
+
 import argparse
 from pathlib import Path
 
@@ -9,6 +11,7 @@
 
 
 def parse_args() -> argparse.Namespace:
+    """Parse CLI arguments for running inference."""
     p = argparse.ArgumentParser(
         description="Run one-off inference with a trained Aging-GAN generator"
     )
@@ -51,6 +54,7 @@ def parse_args() -> argparse.Namespace:
 
 @torch.inference_mode()
 def main() -> None:
+    """Load a checkpoint and generate an aged face from ``--input``."""
     cfg = parse_args()
     device = get_device()
 
diff --git a/src/aging_gan/model.py b/src/aging_gan/model.py
@@ -1,3 +1,6 @@
+"""Model definitions for the CycleGAN-style architecture."""
+
+from torch import Tensor
 import torch.nn as nn
 import torch.nn.functional as F
 
@@ -6,7 +9,9 @@
 
 
 class ResidualBlock(nn.Module):
-    def __init__(self, in_features):
+    """Simple residual block with two conv layers."""
+
+    def __init__(self, in_features: int) -> None:
         super().__init__()
 
         conv_block = [
@@ -21,12 +26,15 @@ def __init__(self, in_features):
 
         self.conv_block = nn.Sequential(*conv_block)
 
-    def forward(self, x):
-        return x + self.conv_block(x)  # skip connection
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply the residual block."""
+        return x + self.conv_block(x)
 
 
 class Generator(nn.Module):
-    def __init__(self, ngf, n_residual_blocks=9):
+    """U-Net style generator used for domain translation."""
+
+    def __init__(self, ngf: int, n_residual_blocks: int = 9) -> None:
         super().__init__()
 
         # Initial convlution block
@@ -85,12 +93,15 @@ def __init__(self, ngf, n_residual_blocks=9):
 
         self.model = nn.Sequential(*model)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
+        """Generate an image from ``x``."""
         return self.model(x)
 
 
 class Discriminator(nn.Module):
-    def __init__(self, ndf):
+    """PatchGAN discriminator."""
+
+    def __init__(self, ndf: int) -> None:
         super().__init__()
 
         model = [
@@ -125,13 +136,10 @@ def __init__(self, ndf):
 
         self.model = nn.Sequential(*model)
 
-    def forward(self, x):
-        # x: (B, 3, H, W)
-        x = self.model(x)  # (B, 1, H//8-2, W//8-2)
-        # Average pooling and flatten
-        return F.avg_pool2d(x, x.size()[2:]).view(
-            x.size()[0], -1
-        )  # global average -> (B, 1, 1, 1) -> flatten to (B, 1)
+    def forward(self, x: Tensor) -> Tensor:
+        """Return discriminator logits for input ``x``."""
+        x = self.model(x)
+        return F.avg_pool2d(x, x.size()[2:]).view(x.size()[0], -1)
 
 
 # # Discriminator: PatchGAN 70x70
@@ -187,7 +195,8 @@ def initialize_models(
     ngf: int = 32,
     ndf: int = 32,
     n_blocks: int = 9,
-):
+) -> tuple[Generator, Generator, Discriminator, Discriminator]:
+    """Instantiate generators and discriminators with default sizes."""
     # G = smp.Unet(
     #     encoder_name="resnet34",
     #     encoder_weights="imagenet",  # preload low-level filters
diff --git a/src/aging_gan/train.py b/src/aging_gan/train.py
@@ -134,7 +134,10 @@ def parse_args() -> argparse.Namespace:
     return args
 
 
-def initialize_optimizers(cfg, G, F, DX, DY):
+def initialize_optimizers(
+    cfg, G, F, DX, DY
+) -> tuple[optim.Optimizer, optim.Optimizer, optim.Optimizer, optim.Optimizer]:
+    """Create Adam optimizers for all models."""
     # track all generator params (even frozen encoder params during initial training).
     # This would allow us to transition easily to the full fine-tuning later on by simply toggling requires_grad=True
     # since the optimizers already track all the parameters from the start.
@@ -170,7 +173,8 @@ def initialize_loss_functions(
     lambda_adv_value: float = 2.0,
     lambda_cyc_value: float = 10.0,
     lambda_id_value: float = 7.0,
-):
+) -> tuple[nn.Module, nn.Module, float, float, float]:
+    """Return basic CycleGAN loss functions and weights."""
     mse = nn.MSELoss()
     l1 = nn.L1Loss()
     lambda_adv = lambda_adv_value
@@ -180,7 +184,10 @@ def initialize_loss_functions(
     return mse, l1, lambda_adv, lambda_cyc, lambda_id
 
 
-def make_schedulers(cfg, opt_G, opt_F, opt_DX, opt_DY):
+def make_schedulers(
+    cfg, opt_G, opt_F, opt_DX, opt_DY
+) -> tuple[LambdaLR, LambdaLR, LambdaLR, LambdaLR]:
+    """Return LR schedulers that decay linearly after half the run."""
     # keep lr constant constant for the first half, then linearly decay to 0
     n_epochs = cfg.num_train_epochs
     start_decay = n_epochs // 2
@@ -215,7 +222,8 @@ def perform_train_step(
     opt_DX,
     opt_DY,  # discriminator optimizers
     accelerator,
-):
+) -> dict[str, float]:
+    """Run a single optimization step for generators and discriminators."""
     x, y = real_data
     # ------ Update Generators ------
     opt_G.zero_grad(set_to_none=True)
@@ -304,7 +312,8 @@ def evaluate_epoch(
     lambda_id,  # loss functions and loss params
     fid_metric,
     accelerator,
-):
+) -> dict[str, float]:
+    """Evaluate models on ``loader`` and return averaged metrics."""
     metrics = {
         f"{split}/loss_DX": 0.0,
         f"{split}/loss_DY": 0.0,
@@ -416,7 +425,7 @@ def perform_epoch(
     epoch,
     accelerator,
     fid_metric,
-):
+) -> dict[str, float]:
     """Perform a single epoch."""
     # TRAINING
     logger.info("Training...")
diff --git a/src/aging_gan/utils.py b/src/aging_gan/utils.py
@@ -1,3 +1,5 @@
+"""Utility helpers for training and infrastructure management."""
+
 import os
 import requests
 import logging
@@ -16,7 +18,8 @@
 logger = logging.getLogger(__name__)
 
 
-def get_device():
+def get_device() -> torch.device:
+    """Return CUDA device if available else CPU."""
     return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
@@ -30,7 +33,8 @@ def set_seed(seed: int) -> None:
     torch.backends.cudnn.benchmark = False  # trade speed for reproducibility
 
 
-def load_environ_vars(wandb_project: str = "aging-gan"):
+def load_environ_vars(wandb_project: str = "aging-gan") -> None:
+    """Set basic environment variables needed for a run."""
     os.environ["WANDB_PROJECT"] = wandb_project
     logger.info(f"W&B project set to '{wandb_project}'")
 
@@ -64,7 +68,7 @@ def save_checkpoint(
     sched_DX,
     sched_DY,  # schedulers
     kind: str = "best",
-):
+) -> None:
     """Overwrite the single best-ever checkpoint."""
     ckpt_dir = Path(__file__).resolve().parents[2] / "outputs/checkpoints"
     os.makedirs(ckpt_dir, exist_ok=True)
@@ -103,7 +107,8 @@ def generate_and_save_samples(
     epoch,
     device: torch.device,
     num_samples: int = 8,
-):
+) -> None:
+    """Generate ``num_samples`` images from ``generator`` and save a grid."""
     # grab batches until num_samples
     collected = []
     for imgs, _ in val_loader: