bug fix for training (#25)

MuhammedHasan · Muhammed Hasan Celik · web-flow · commit 328e33d761d0 · 2025-08-26T12:41:45.000-07:00
Co-authored-by: Muhammed Hasan Celik &lt;celik.muhammed_hasan@gene.com&gt;
diff --git a/src/decima/cli/finetune.py b/src/decima/cli/finetune.py
@@ -1,4 +1,5 @@
 """Finetune the Decima model."""
+
 import logging
 import click
 import anndata
@@ -9,7 +10,12 @@
 
 @click.command()
 @click.option("--name", required=True, help="Name of the run.")
-@click.option("--model", default="0", type=str, help="Model path or replication number. If a path is provided, the model will be loaded from the path. If a replication number is provided, the model will be loaded from the replication number.")
+@click.option(
+    "--model",
+    default="0",
+    type=str,
+    help="Model path or replication number. If a path is provided, the model will be loaded from the path. If a replication number is provided, the model will be loaded from the replication number.",
+)
 @click.option("--matrix-file", required=True, help="Matrix file path.")
 @click.option("--h5-file", required=True, help="H5 file path.")
 @click.option("--outdir", required=True, help="Output directory path to save model checkpoints.")
@@ -24,7 +30,24 @@
 @click.option("--logger", default="wandb", type=str, help="Logger.")
 @click.option("--num-workers", default=16, type=int, help="Number of workers.")
 @click.option("--seed", default=0, type=int, help="Random seed.")
-def cli_finetune(name, model, matrix_file, h5_file , outdir, learning_rate, loss_total_weight, gradient_accumulation, batch_size, max_seq_shift, gradient_clipping, save_top_k, epochs, logger, num_workers, seed):
+def cli_finetune(
+    name,
+    model,
+    matrix_file,
+    h5_file,
+    outdir,
+    learning_rate,
+    loss_total_weight,
+    gradient_accumulation,
+    batch_size,
+    max_seq_shift,
+    gradient_clipping,
+    save_top_k,
+    epochs,
+    logger,
+    num_workers,
+    seed,
+):
     """Finetune the Decima model."""
     train_logger = logger
     logger = logging.getLogger("decima")
@@ -62,6 +85,7 @@ def cli_finetune(name, model, matrix_file, h5_file , outdir, learning_rate, loss
     }
     model_params = {
         "n_tasks": ad.shape[0],
+        "init_borzoi": True,
         "replicate": model,
     }
     logger.info(f"train_params: {train_params}")
diff --git a/src/decima/model/decima_model.py b/src/decima/model/decima_model.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from tempfile import TemporaryDirectory
 
@@ -47,15 +48,18 @@ def __init__(self, n_tasks: int, mask=True, borzoi_kwargs: dict = None, init_bor
             model = int(model)
 
         if init_borzoi:
+            logger = logging.getLogger("decima")
             # Load state dict
             if Path(str(replicate)).exists():
+                logger.info(f"Initializing weights from Borzoi model using file: {replicate}")
                 if replicate.endswith(".h5") or replicate.endswith(".pth") or replicate.endswith(".pt"):
                     state_dict = torch.load(replicate)
                 elif replicate.endswith(".ckpt"):
                     state_dict = torch.load(replicate)["state_dict"]
                 else:
                     raise ValueError(f"Invalid replicate path: {replicate}")
             else:
+                logger.info(f"Initializing weights from Borzoi model using wandb for replicate: {replicate}")
                 wandb.login(host="https://api.wandb.ai/", anonymous="must")
                 api = wandb.Api(overrides={"base_url": "https://api.wandb.ai/"})
                 art = api.artifact(f"grelu/borzoi/human_state_dict_fold{replicate}:latest")