Skip to content

Commit 9b1582a

Browse files
committed
Complete remaining tasks: LOCCDT hook, NATURaL CF, docking.py::run()
LIB/read_input.cpp (Task 1.3 read_input hook): - Add #include "CavityDetect/CavityDetect.h" - Add LOCCDT RNGOPT branch: "RNGOPT LOCCDT [cleft_id] [min_r] [max_r]" Runs CavityDetector::load_from_fa() + detect() on the loaded receptor, writes loccdt_cleft.pdb for inspection, calls to_flexaid_spheres() to get sphere* linked list, then feeds it to generate_grid() + calc_cleftic() — identical pipeline to LOCCEN/LOCCLF but cavity auto-detected. Exits with error if no clefts found or requested cleft_id not present. LIB/NATURaL/NATURaLDualAssembly.cpp (Phase 3 CF placeholder removal): - Add #include "../gaboom.h" for vcfunction() declaration - compute_partial_cf(): replace -0.1*count placeholder with a real vcfunction() call on the current Cartesian atom coords. Temporarily trims FA_->num_optres to n_grown_residues so only grown residues are scored; restores the original count after the call. Returns 0.0 on vcfunction error rather than crashing the growth simulation. python/flexaidds/docking.py (Phase 2 docking.py::run()): - Add subprocess, shutil, re imports - BindingMode.free_energy: fall back to min(pose.energy) when _cpp_mode is None (Python-only path used by PDB parser) - BindingPopulation.__init__: accept optional modes list + temperature so run() can construct it directly - Docking.run(binary, timeout): full implementation — 1. _find_binary(): search PATH → build/ → ../build/ 2. subprocess.run(FlexAID config.inp) with capture + timeout 3. Discover *_*.pdb output files by mtime order 4. _parse_remark_pdb(): extract mode index, CF, RMSD, frequency from REMARK lines written by output_BindingMode(); compute Boltzmann weight 5. Aggregate poses into BindingMode objects, sort by free_energy, return BindingPopulation https://claude.ai/code/session_01PQFoVRutnkDNUQTFX1JQAw
1 parent 65f1c05 commit 9b1582a

File tree

3 files changed

+242
-29
lines changed

3 files changed

+242
-29
lines changed

LIB/NATURaL/NATURaLDualAssembly.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "NATURaLDualAssembly.h"
2828
#include "RibosomeElongation.h"
2929
#include "TransloconInsertion.h"
30+
#include "../gaboom.h" // vcfunction / cfstr / get_apparent_cf_evalue
3031

3132
#include <cstring>
3233
#include <cmath>
@@ -326,17 +327,28 @@ std::vector<DualAssemblyEngine::GrowthStep> DualAssemblyEngine::run() {
326327

327328
// ─── compute_partial_cf ───────────────────────────────────────────────────────
328329
double DualAssemblyEngine::compute_partial_cf(int n_grown_residues) const {
329-
// Simplified CF estimate: each grown residue contributes an attractive
330-
// contact energy. Production code calls the full cffunction()/vcfunction()
331-
// pipeline (ic2cf.cpp) with the current partially-grown complex.
332-
if (!FA_ || !atoms_ || !residues_) return 0.0;
333-
334-
// Scale CF by accessible surface area proxy:
335-
// more residues → more surface buried → stronger (negative) CF.
336-
// Using -0.1 kcal/mol/residue as a placeholder; production uses the
337-
// actual FlexAID Contact Function grid evaluation.
338-
double count = std::min(n_grown_residues, n_residues_);
339-
return -0.1 * count; // kcal/mol (attractive)
330+
if (!FA_ || !VC_ || !atoms_ || !residues_) return 0.0;
331+
332+
// Temporarily mark grown residues as scorable and the rest as non-scorable
333+
// by adjusting FA_->num_optres to cover only the grown portion.
334+
// vcfunction() scores the current Cartesian atom coordinates directly —
335+
// no IC → CC rebuild needed since DualAssembly keeps atoms_ in Cartesian.
336+
const int saved_num_optres = FA_->num_optres;
337+
const int n_score = std::min(n_grown_residues, n_residues_);
338+
339+
// Limit scoring to the first n_score residues (those grown so far).
340+
// optres[] is ordered by residue index; trim the active count.
341+
FA_->num_optres = std::min(n_score, saved_num_optres);
342+
343+
std::vector<std::pair<int,int>> intraclashes;
344+
bool error = false;
345+
double cf_val = vcfunction(FA_, VC_, atoms_, residues_, intraclashes, &error);
346+
347+
// Restore original optres count
348+
FA_->num_optres = saved_num_optres;
349+
350+
if (error) return 0.0;
351+
return cf_val; // kcal/mol (attractive values are negative)
340352
}
341353

342354
// ─── compute_growth_entropy ───────────────────────────────────────────────────

LIB/read_input.cpp

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "flexaid.h"
22
#include "fileio.h"
3+
#include "CavityDetect/CavityDetect.h"
34

45
/*****************************************************************************
56
* SUBROUTINE read_input reads input file.
@@ -433,14 +434,62 @@ void read_input(FA_Global* FA,atom** atoms, resid** residue,rot** rotamer,gridpo
433434
calc_cleftic(FA,*cleftgrid);
434435

435436
}else if(!strcmp(rngopt,"LOCCLF")){
436-
437+
437438
//RNGOPT LOCCLF filename.pdb
438439
strcpy(FA->rngopt,"locclf");
439440
strcpy(clf_file,&rngoptline[14]);
440-
441+
441442
printf("read binding-site grid <%s>\n",clf_file);
442443
spheres = read_spheres(clf_file);
443-
444+
445+
(*cleftgrid) = generate_grid(FA,spheres,(*atoms),(*residue));
446+
calc_cleftic(FA,*cleftgrid);
447+
448+
}else if(!strcmp(rngopt,"LOCCDT")){
449+
450+
// RNGOPT LOCCDT [cleft_id] [min_radius] [max_radius]
451+
// Use native CavityDetector (SURFNET + AVX-512/OpenMP/Metal) to
452+
// locate the largest binding cleft automatically.
453+
// Optional args: cleft_id (default 1), probe radii (default 1.4–4.0 Å).
454+
strcpy(FA->rngopt,"loccdt");
455+
456+
int cdt_cleft_id = 1;
457+
float cdt_min_r = 1.4f;
458+
float cdt_max_r = 4.0f;
459+
sscanf(rngoptline,"%*s %d %f %f", &cdt_cleft_id, &cdt_min_r, &cdt_max_r);
460+
if(cdt_cleft_id < 1) cdt_cleft_id = 1;
461+
462+
printf("LOCCDT: running native CavityDetector (cleft %d, probe %.2f–%.2f Å)\n",
463+
cdt_cleft_id, cdt_min_r, cdt_max_r);
464+
465+
{
466+
cavity_detect::CavityDetector detector;
467+
detector.load_from_fa(*atoms, *residue, FA->res_cnt);
468+
detector.detect(cdt_min_r, cdt_max_r);
469+
470+
if(detector.clefts().empty()){
471+
fprintf(stderr,"ERROR: LOCCDT found no clefts — "
472+
"check probe radii or fall back to LOCCLF/LOCCEN\n");
473+
Terminate(2);
474+
}
475+
476+
// Write the detected cleft as a CLF sphere PDB for inspection
477+
detector.write_sphere_pdb("loccdt_cleft.pdb", cdt_cleft_id);
478+
const auto& cleft_list = detector.clefts();
479+
size_t cdt_idx = static_cast<size_t>(cdt_cleft_id - 1);
480+
printf("LOCCDT: cleft %d detected (%zu spheres) — "
481+
"written to loccdt_cleft.pdb\n",
482+
cdt_cleft_id,
483+
cdt_idx < cleft_list.size() ? cleft_list[cdt_idx].spheres.size() : 0u);
484+
485+
spheres = detector.to_flexaid_spheres(cdt_cleft_id);
486+
}
487+
488+
if(!spheres){
489+
fprintf(stderr,"ERROR: LOCCDT cleft %d not found\n", cdt_cleft_id);
490+
Terminate(2);
491+
}
492+
444493
(*cleftgrid) = generate_grid(FA,spheres,(*atoms),(*residue));
445494
calc_cleftic(FA,*cleftgrid);
446495
}

python/flexaidds/docking.py

Lines changed: 167 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
Provides Pythonic API for molecular docking workflows.
44
"""
55

6+
import subprocess
7+
import shutil
8+
import re
69
import numpy as np
710
from pathlib import Path
811
from typing import List, Optional, Dict, Any
@@ -85,6 +88,9 @@ def free_energy(self) -> float:
8588
"""Helmholtz free energy F = H - TS (kcal/mol)."""
8689
if self._cpp_mode:
8790
return self._cpp_mode.get_free_energy()
91+
# Python-only path: use best pose energy as proxy
92+
if self._poses:
93+
return min(p.energy for p in self._poses)
8894
return float('inf')
8995

9096
@property
@@ -123,9 +129,10 @@ class BindingPopulation:
123129
Provides ensemble-level analysis and ranking of binding modes.
124130
"""
125131

126-
def __init__(self):
127-
self._modes: List[BindingMode] = []
128-
self._temperature: float = 300.0
132+
def __init__(self, modes: Optional[List[BindingMode]] = None,
133+
temperature: float = 300.0):
134+
self._modes: List[BindingMode] = list(modes) if modes else []
135+
self._temperature: float = temperature
129136

130137
def add_mode(self, mode: BindingMode) -> None:
131138
"""Add a binding mode to the population."""
@@ -286,21 +293,166 @@ def optimization_method(self) -> Optional[str]:
286293
"""Optimization method, e.g. 'GA' (METOPT keyword)."""
287294
return self._config.get("METOPT")
288295

289-
def run(self, **kwargs) -> BindingPopulation:
290-
"""Execute docking simulation.
291-
296+
def run(self, binary: Optional[str] = None,
297+
timeout: int = 3600, **kwargs) -> BindingPopulation:
298+
"""Execute docking via the FlexAID C++ binary and parse results.
299+
300+
Locates the ``FlexAID`` binary (in PATH, project build/, or explicit
301+
*binary* argument), invokes it with this config file, waits for
302+
completion, then parses all ``*_N_M.pdb`` output files written by
303+
``output_Population()`` to reconstruct a ``BindingPopulation``.
304+
305+
Args:
306+
binary: Path to FlexAID executable. If *None*, searches PATH and
307+
common build locations (``build/FlexAID``,
308+
``../build/FlexAID``).
309+
timeout: Wall-clock timeout in seconds (default 3600).
310+
**kwargs: Ignored; reserved for future keyword overrides.
311+
292312
Returns:
293-
BindingPopulation with ranked binding modes
294-
295-
Note:
296-
Full implementation requires integration with C++ FlexAID GA engine.
297-
This is a stub for Phase 2 development.
313+
BindingPopulation populated from the PDB REMARK lines written by
314+
``output_BindingMode()`` / ``output_Population()``.
315+
316+
Raises:
317+
FileNotFoundError: binary not found.
318+
RuntimeError: FlexAID exited non-zero or produced no output.
298319
"""
299-
raise NotImplementedError(
300-
"Full docking pipeline integration in progress. "
301-
"For now, use C++ FlexAID binary directly and parse output with "
302-
"BindingMode/BindingPopulation wrappers."
320+
# ── 1. Locate binary ─────────────────────────────────────────────────
321+
exe = self._find_binary(binary)
322+
323+
# ── 2. Invoke FlexAID ────────────────────────────────────────────────
324+
cmd = [str(exe), str(self.config_file)]
325+
try:
326+
result = subprocess.run(
327+
cmd,
328+
cwd=self.config_file.parent,
329+
capture_output=True,
330+
text=True,
331+
timeout=timeout,
332+
)
333+
except subprocess.TimeoutExpired as exc:
334+
raise RuntimeError(
335+
f"FlexAID timed out after {timeout}s"
336+
) from exc
337+
338+
if result.returncode != 0:
339+
raise RuntimeError(
340+
f"FlexAID exited with code {result.returncode}.\n"
341+
f"stdout: {result.stdout[-2000:]}\n"
342+
f"stderr: {result.stderr[-2000:]}"
343+
)
344+
345+
# ── 3. Discover output PDBs ──────────────────────────────────────────
346+
# output_BindingMode writes files named <prefix>_<minPoints>_<mode>.pdb
347+
# Collect all candidate PDB files in the working directory.
348+
work_dir = self.config_file.parent
349+
pdb_files = sorted(work_dir.glob("*_*.pdb"),
350+
key=lambda p: p.stat().st_mtime)
351+
352+
if not pdb_files:
353+
raise RuntimeError(
354+
"FlexAID completed but no PDB output files were found in "
355+
f"{work_dir}. Check the config file NRGOUT / output settings."
356+
)
357+
358+
# ── 4. Parse PDB REMARK lines into BindingModes ──────────────────────
359+
temperature = self._config.get("TEMPER", 300) or 300
360+
modes: List[BindingMode] = []
361+
seen_modes: Dict[int, BindingMode] = {}
362+
363+
for pdb_path in pdb_files:
364+
mode_info = self._parse_remark_pdb(pdb_path, temperature)
365+
if mode_info is None:
366+
continue
367+
mode_idx, pose = mode_info
368+
if mode_idx not in seen_modes:
369+
seen_modes[mode_idx] = BindingMode()
370+
seen_modes[mode_idx]._poses.append(pose)
371+
372+
# Sort modes by free energy (ascending → most favourable first)
373+
modes = sorted(seen_modes.values(),
374+
key=lambda m: m.free_energy)
375+
376+
return BindingPopulation(modes, temperature=float(temperature))
377+
378+
# ── helpers ───────────────────────────────────────────────────────────────
379+
380+
def _find_binary(self, binary: Optional[str]) -> Path:
381+
"""Locate the FlexAID executable."""
382+
if binary is not None:
383+
p = Path(binary)
384+
if not p.is_file():
385+
raise FileNotFoundError(f"Specified FlexAID binary not found: {binary}")
386+
return p
387+
388+
# Search order: PATH → project-relative build dirs
389+
in_path = shutil.which("FlexAID")
390+
if in_path:
391+
return Path(in_path)
392+
393+
candidates = [
394+
self.config_file.parent / "FlexAID",
395+
self.config_file.parent / "build" / "FlexAID",
396+
Path(__file__).parents[3] / "build" / "FlexAID",
397+
]
398+
for c in candidates:
399+
if c.is_file():
400+
return c
401+
402+
raise FileNotFoundError(
403+
"FlexAID binary not found in PATH or build/. "
404+
"Build with 'cmake --build build' or pass binary= argument."
405+
)
406+
407+
@staticmethod
408+
def _parse_remark_pdb(
409+
pdb_path: Path, temperature: float) -> Optional[tuple]:
410+
"""Parse a single output PDB written by output_BindingMode().
411+
412+
Extracts mode index, CF, RMSD, and per-pose energy from REMARK lines.
413+
Returns (mode_index, Pose) or None if the file lacks FlexAID remarks.
414+
"""
415+
mode_idx = None
416+
cf_val = None
417+
rmsd_val = None
418+
freq = 1
419+
420+
try:
421+
text = pdb_path.read_text(errors="replace")
422+
except OSError:
423+
return None
424+
425+
for line in text.splitlines():
426+
if not line.startswith("REMARK"):
427+
continue
428+
# "REMARK Binding Mode:N Best CF in Binding Mode:X …"
429+
m = re.search(
430+
r"Binding Mode:(\d+).*?Best CF in Binding Mode:\s*([-\d.]+)"
431+
r".*?Binding Mode Frequency:(\d+)",
432+
line)
433+
if m:
434+
mode_idx = int(m.group(1))
435+
cf_val = float(m.group(2))
436+
freq = int(m.group(3))
437+
# "REMARK 0.12345 RMSD to ref. structure …"
438+
m2 = re.search(r"REMARK\s+([\d.]+)\s+RMSD to ref\.", line)
439+
if m2 and rmsd_val is None:
440+
rmsd_val = float(m2.group(1))
441+
442+
if mode_idx is None or cf_val is None:
443+
return None
444+
445+
import math
446+
beta = 1.0 / (0.001987206 * float(temperature))
447+
bw = math.exp(-beta * cf_val)
448+
449+
pose = Pose(
450+
index=mode_idx,
451+
energy=cf_val,
452+
rmsd=rmsd_val,
453+
boltzmann_weight=bw,
303454
)
455+
return mode_idx, pose
304456

305457
def __repr__(self) -> str:
306458
return f"<Docking config={self.config_file.name}>"

0 commit comments

Comments
 (0)