Merge pull request #61 from seqeralabs/fix/boltz2_refolding

FloWuenne · web-flow · commit f267e39d392c · 2025-11-27T14:42:40.000-05:00
Fixing boltz2 docker image and command.
diff --git a/assets/boltz2.Dockerfile b/assets/boltz2.Dockerfile
@@ -0,0 +1,40 @@
+# 1. Use the Runtime image (Approx. 4GB vs 9GB for devel)
+# This includes PyTorch 2.3 + CUDA 12.1 drivers but NO compiler (nvcc).
+FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
+
+# Set flags to keep things clean and non-interactive
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
+
+# 2. Install minimal system tools
+# We need git for the install, wget for mmseqs2, and build-essential/cmake 
+# temporarily in case 'dm-tree' or other deps need to compile C++ extensions.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    wget \
+    tar \
+    build-essential \
+    cmake \
+    && rm -rf /var/lib/apt/lists/*
+
+# 3. Install Python Dependencies
+# We manually install PyG dependencies from wheels to avoid needing the full CUDA compiler.
+# Note: The URL matches the PyTorch (2.3.0) and CUDA (12.1) versions of the base image.
+RUN pip install --upgrade pip && \
+    pip install torch-scatter torch-sparse torch-cluster torch-spline-conv \
+    -f https://data.pyg.org/whl/torch-2.3.0+cu121.html
+
+# 4. Install Boltz
+# We install 'rdkit' specifically to ensure the pip-optimized version is used.
+RUN pip install "rdkit>=2022.9.5" && \
+    pip install "boltz[cuda]"
+
+# 5. Cleanup to save space
+# Remove build tools that are no longer needed for running the model.
+RUN apt-get purge -y build-essential cmake && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+# Setup working directory
+WORKDIR /app
diff --git a/conf/test_design_protein.config b/conf/test_design_protein.config
@@ -34,7 +34,7 @@ params {
     run_boltz2_refold          = true
     run_ipsae                  = true
     run_prodigy                = true
-    run_foldseek               = true
+    run_foldseek               = false
     run_consolidation          = true
     
     // Output
diff --git a/modules/local/boltz2_refold.nf b/modules/local/boltz2_refold.nf
@@ -21,7 +21,7 @@ process BOLTZ2_REFOLD {
     publishDir "${params.outdir}/${meta.parent_id}/boltz2", mode: params.publish_dir_mode
 
     // Build Boltz-2 container using Wave with conda
-    conda "boltz::boltz=1.0.0"
+    container 'boltz2:latest'
     
     // GPU acceleration - Boltz-2 benefits from GPU for efficient prediction
     accelerator 1, type: 'nvidia-gpu'
@@ -45,6 +45,13 @@ process BOLTZ2_REFOLD {
     #!/bin/bash
     set -euo pipefail
     
+    # Fix for Numba caching error in containers
+    export NUMBA_CACHE_DIR="\${PWD}/numba_cache"
+    mkdir -p "\${NUMBA_CACHE_DIR}"
+    
+    # Fix for Boltz caching error (tries to write to /.boltz)
+    export HOME="\${PWD}"
+    
     echo "============================================"
     echo "Boltz-2 Multimer Structure Prediction"
     echo "============================================"
@@ -72,7 +79,7 @@ process BOLTZ2_REFOLD {
     echo ""
     echo "Processing ProteinMPNN sequences..."
     
-    python3 <<'PARSE_FASTA'
+    python3 <<PARSE_FASTA
 import sys
 import yaml
 import os
@@ -81,7 +88,7 @@ import os
 fasta_input = "${mpnn_sequences}"
 fasta_files = fasta_input.split() if " " in fasta_input else [fasta_input]
 
-target_seq = "\${TARGET_SEQ}"
+target_seq = "\$TARGET_SEQ"
 output_base = "${meta.id}"
 parent_id = "${meta.parent_id}"