Genentech
diff --git a/‎.github/workflows/run-tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run-tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.cfg‎
Lines changed: 5 additions & 2 deletions b/‎setup.cfg‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/decima/cli/__init__.py‎
Lines changed: 12 additions & 10 deletions b/‎src/decima/cli/__init__.py‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎src/decima/cli/attributions.py‎
Lines changed: 21 additions & 2 deletions b/‎src/decima/cli/attributions.py‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎src/decima/cli/download.py‎
Lines changed: 1 addition & 1 deletion b/‎src/decima/cli/download.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/decima/cli/finetune.py‎
Lines changed: 1 addition & 1 deletion b/‎src/decima/cli/finetune.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/decima/cli/predict_genes.py‎
Lines changed: 1 addition & 1 deletion b/‎src/decima/cli/predict_genes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/decima/cli/query_cell.py‎
Lines changed: 2 additions & 2 deletions b/‎src/decima/cli/query_cell.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/decima/cli/vep.py‎
Lines changed: 123 additions & 0 deletions b/‎src/decima/cli/vep.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎src/decima/core/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎src/decima/core/__init__.py‎
Lines changed: 0 additions & 1 deletion
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     name: Python ${{ matrix.python-version }}
     steps:
 
@@ -50,7 +50,8 @@ python_requires = >=3.9
 install_requires =
     importlib-metadata; python_version<"3.8"
     click
-    wandb # TODO: move to optional
+    more_itertools
+    wandb
     numpy
     torch
     grelu
@@ -65,6 +66,8 @@ install_requires =
     anndata
     h5py
     pyBigWig
+    pyarrow
+    tangermeme<0.5
 
 [options.packages.find]
 where = src
@@ -76,7 +79,7 @@ exclude =
 # `pip install decima[PDF]` like:
 # PDF = ReportLab; RXP
 optional =
-    wandb
+    cyvcf2
 
 # Add here test requirements (semicolon/line-separated)
 testing =
 
@@ -1,11 +1,12 @@
 import logging
 import click
 
-# from decima.cli.finetune import finetune
-from decima.cli.predict_genes import predict_genes
-from decima.cli.download import download
-from decima.cli.attributions import attributions
-from decima.cli.query_cell import query_cell
+from decima.cli.predict_genes import cli_predict_genes
+from decima.cli.download import cli_download
+from decima.cli.attributions import cli_attributions
+from decima.cli.query_cell import cli_query_cell
+from decima.cli.vep import cli_predict_variant_effect
+# from decima.cli.finetune import cli_finetune
 
 
 logger = logging.getLogger("decima")
@@ -25,11 +26,12 @@ def main():
     pass
 
 
-# main.add_command(finetune)
-main.add_command(predict_genes)
-main.add_command(download)
-main.add_command(attributions)
-main.add_command(query_cell)
+# main.add_command(cli_finetune, name="finetune")
+main.add_command(cli_predict_genes, name="predict-genes")
+main.add_command(cli_download, name="download")
+main.add_command(cli_attributions, name="attributions")
+main.add_command(cli_query_cell, name="query-cell")
+main.add_command(cli_predict_variant_effect, name="vep")
 
 if __name__ == "__main__":
     main()
@@ -22,6 +22,12 @@
     default=0,
     help="Model to use for attribution analysis either replicate number or path to the model.",
 )
+@click.option(
+    "--metadata",
+    type=click.Path(exists=True),
+    default=None,
+    help="Path to the metadata anndata file. Default: None.",
+)
 @click.option(
     "--method", type=str, required=False, default="inputxgradient", help="Method to use for attribution analysis."
 )
@@ -30,8 +36,20 @@
 @click.option("--plot_seqlogo", is_flag=True, help="Generate sequence logo plots for peaks")
 @click.option("--seqlogo_window", type=int, default=50, help="Window size for sequence logo plots")
 @click.option("--dpi", type=int, default=100, help="DPI for attribution plots")
-def attributions(
-    output_dir, genes, seqs, tasks, off_tasks, model, method, device, plot_peaks, plot_seqlogo, seqlogo_window, dpi
+def cli_attributions(
+    output_dir,
+    genes,
+    seqs,
+    tasks,
+    off_tasks,
+    model,
+    metadata,
+    method,
+    device,
+    plot_peaks,
+    plot_seqlogo,
+    seqlogo_window,
+    dpi,
 ):
     """
     Generate and save attribution analysis results for a gene or a set of sequences.
@@ -80,6 +98,7 @@ def attributions(
         tasks=tasks,
         off_tasks=off_tasks,
         model=model,
+        metadata_anndata=metadata,
         method=method,
         device=device,
         plot_peaks=plot_peaks,
 
@@ -3,6 +3,6 @@
 
 
 @click.command()
-def download():
+def cli_download():
     """Download all required data and model weights."""
     download_decima_data()
@@ -16,7 +16,7 @@
 @click.option("--grad", required=True, type=int, help="Gradient accumulation steps")
 @click.option("--replicate", default=0, type=int, help="Replication number")
 @click.option("--bs", default=4, type=int, help="Batch size")
-def finetune(name, dir, lr, weight, grad, replicate, bs):
+def cli_finetune(name, dir, lr, weight, grad, replicate, bs):
     """Finetune the Decima model."""
     wandb.login(host="https://genentech.wandb.io")
     run = wandb.init(project="decima", dir=name, name=name)
 
@@ -20,7 +20,7 @@
 @click.option("--matrix_file", required=True, help="Path to h5ad file containing genes to predict.")
 @click.option("--out_file", required=True, help="Output file path.")
 @click.option("--max_seq_shift", default=0, help="Maximum jitter for augmentation.")
-def predict_genes(device, ckpts, h5_file, matrix_file, out_file, max_seq_shift):
+def cli_predict_genes(device, ckpts, h5_file, matrix_file, out_file, max_seq_shift):
     """Make predictions for all genes."""
     torch.set_float32_matmul_precision("medium")
 
 
@@ -4,9 +4,9 @@
 
 @click.command()
 @click.argument("query", default="")
-def query_cell(query=""):
+def cli_query_cell(query=""):
     """
-    Query a cell using query strig
+    Query a cell using query string
 
     Examples:
 
 
@@ -0,0 +1,123 @@
+import click
+from decima.constants import DECIMA_CONTEXT_SIZE
+from decima.vep import predict_variant_effect
+
+
+@click.command()
+@click.option(
+    "-v",
+    "--variants",
+    type=click.Path(exists=True),
+    help="Path to the variant file .vcf file. VCF file need to be normalized. Try normalizing th vcf file incase of an error. `bcftools norm -f ref.fasta input.vcf.gz -o output.vcf.gz`",
+)
+@click.option("-o", "--output_pq", type=click.Path(), help="Path to the output parquet file.")
+@click.option("--tasks", type=str, default=None, help="Tasks to predict. If not provided, all tasks will be predicted.")
+@click.option(
+    "--chunksize",
+    type=int,
+    default=10_000,
+    help="Number of variants to process in each chunk. Loading variants in chunks is more memory efficient."
+    "This chuck of variants will be process and saved to output parquet file before contineus to next chunk. Default: 10_000.",
+)
+@click.option(
+    "--model",
+    type=str,
+    default="0",
+    help="Model to use for variant effect prediction either replicate number or path to the model.",
+)
+@click.option(
+    "--metadata",
+    type=click.Path(exists=True),
+    default=None,
+    help="Path to the metadata anndata file. Default: None.",
+)
+@click.option(
+    "--device", type=str, default=None, help="Device to use. Default: None which automatically selects the best device."
+)
+@click.option("--batch-size", type=int, default=8, help="Batch size for the model. Default: 8")
+@click.option("--num-workers", type=int, default=4, help="Number of workers for the loader. Default: 4")
+@click.option("--distance-type", type=str, default="tss", help="Type of distance. Default: tss.")
+@click.option(
+    "--min-distance",
+    type=float,
+    default=0,
+    help="Minimum distance from the end of the gene. Default: 0.",
+)
+@click.option(
+    "--max-distance",
+    type=float,
+    default=DECIMA_CONTEXT_SIZE,
+    help=f"Maximum distance from the TSS. Default: {DECIMA_CONTEXT_SIZE}.",
+)
+@click.option(
+    "--include-cols",
+    type=str,
+    default=None,
+    help="Columns to include in the output in the original tsv file to include in the output parquet file. Default: None.",
+)
+@click.option(
+    "--gene-col",
+    type=str,
+    default=None,
+    help="Column name for gene names. Default: None.",
+)
+@click.option("--genome", type=str, default="hg38", help="Genome build. Default: hg38.")
+def cli_predict_variant_effect(
+    variants,
+    output_pq,
+    tasks,
+    chunksize,
+    model,
+    metadata,
+    device,
+    batch_size,
+    num_workers,
+    distance_type,
+    min_distance,
+    max_distance,
+    include_cols,
+    gene_col,
+    genome,
+):
+    """Predict variant effect and save to parquet
+
+    Examples:
+
+        >>> decima vep -v "data/sample.vcf" -o "vep_results.parquet"
+
+        >>> decima vep -v "data/sample.vcf" -o "vep_results.parquet" --tasks "cell_type == 'classical monocyte'" # only predict for classical monocytes
+
+        >>> decima vep -v "data/sample.vcf" -o "vep_results.parquet" --device 0 # use device gpu device 0
+
+        >>> decima vep -v "data/sample.vcf" -o "vep_results.parquet" --include-cols "gene_name,gene_id" # include gene_name and gene_id columns in the output
+
+        >>> decima vep -v "data/sample.vcf" -o "vep_results.parquet" --gene-col "gene_name" # use gene_name column as gene names if these option passed genes and variants mapped based on these column not based on the genomic locus based on the annotaiton.
+
+        >>> decima vep -v "data/sample.vcf" -o "vep_results.parquet" --distance-type tss --min-distance 50000 --max-distance 100000 # predict for variants within 50kb of the TSS and 100kb of the TSS
+    """
+    if model in ["0", "1", "2", "3"]:  # replicate index
+        model = int(model)
+
+    if isinstance(device, str) and device.isdigit():
+        device = int(device)
+
+    if include_cols:
+        include_cols = include_cols.split(",")
+
+    predict_variant_effect(
+        variants,
+        output_pq=output_pq,
+        tasks=tasks,
+        model=model,
+        metadata_anndata=metadata,
+        chunksize=chunksize,
+        device=device,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        distance_type=distance_type,
+        min_distance=min_distance,
+        max_distance=max_distance,
+        include_cols=include_cols,
+        gene_col=gene_col,
+        genome=genome,
+    )
@@ -1 +0,0 @@
-from decima.core.result import DecimaResult
Original file line number	Diff line number	Diff line change
`@@ -1 +0,0 @@`
`1`		`-from decima.core.result import DecimaResult`