pregHosh
diff --git a/‎README.md‎
Lines changed: 4 additions & 1 deletion b/‎README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎configs/generate.yaml‎
Lines changed: 8 additions & 2 deletions b/‎configs/generate.yaml‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎configs/interference/gen_outpaint.yaml‎
Lines changed: 0 additions & 3 deletions b/‎configs/interference/gen_outpaint.yaml‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎scripts/applications/utils/compute_metrics.py‎
Lines changed: 5 additions & 2 deletions b/‎scripts/applications/utils/compute_metrics.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎scripts/applications/utils/xtb_optimization.py‎
Lines changed: 64 additions & 3 deletions b/‎scripts/applications/utils/xtb_optimization.py‎
Lines changed: 64 additions & 3 deletions
diff --git a/‎scripts/gradient_guidance/sf_energy_score.py‎
Lines changed: 88 additions & 0 deletions b/‎scripts/gradient_guidance/sf_energy_score.py‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/MolecularDiffusion/data/component/feature.py‎
Lines changed: 15 additions & 10 deletions b/‎src/MolecularDiffusion/data/component/feature.py‎
Lines changed: 15 additions & 10 deletions
@@ -47,7 +47,7 @@ For a more detailed installation, including setting up a conda environment and i
     conda install conda-forge::openbabel
     conda install xtb==6.7.1
     # install other libraries
-    pip install fire seaborn decorator numpy==1.26.4 scipy rdkit-pypi posebusters==0.5.1 networkx matplotlib pandas scikit-learn tqdm pyyaml omegaconf ase morfeus cosymlib morfeus-ml wandb rmsd
+    pip install fire seaborn decorator numpy scipy rdkit-pypi posebusters==0.5.1 networkx matplotlib pandas scikit-learn tqdm pyyaml omegaconf ase morfeus-ml morfeus-ml wandb rmsd
 
     pip install hydra-core==1.* hydra-colorlog rootutils
 
@@ -61,6 +61,9 @@ For a more detailed installation, including setting up a conda environment and i
     # Install the package. Use editable mode (-e) to make the MolCraftDiff CLI tool available.
     pip install -e .
 
+    # optional for some featurizer/metrics
+    # this require numpy==1.24.*
+    pip install cosymlib
 
 Usage
 -----
 
@@ -10,9 +10,9 @@ defaults:
 
 # run name, eg. for wandb logging
 name: "akatsuki"
-chkpt_directory: "/home/pregabalin/RF/MolecularDiffusion/trained_models/edm_pretrained/"
+chkpt_directory: "/home/pregabalin/RF/MolecularDiffusion/trained_models/edm_qm9vqm24//"
 atom_vocab: [H,B,C,N,O,F,Al,Si,P,S,Cl,As,Se,Br,I,Hg,Bi]
-diffusion_steps: 600
+diffusion_steps: 300
 
 # tags to help you identify your experiments
 # you can overwrite this in experiment configs
@@ -21,3 +21,9 @@ diffusion_steps: 600
 # seed for random number generators in pytorch, numpy and python.random
 seed: 9
 
+interference:
+  batch_size: 1
+  num_generate: 12
+  output_path: gen_test
+  n_frames: 30
+  # mol_size: [0,0]
@@ -23,9 +23,6 @@ condition_configs:
     scale_factor: 1.1
     noise_initial_mask: False
     connector_dicts:
-      1: [3]
-      2: [3]
-      3: [3]
   n_retrys: 3
   t_retry: 180
 
@@ -180,7 +180,7 @@ def runner(args):
         for xyz in tqdm(xyz_passed, desc="Checking neutrality of molecules", total=len(xyz_passed)):
             neutral_mols.append(check_neutrality(xyz))
 
-        postbuster_results = run_postbuster(mols, timeout=300)
+        postbuster_results = run_postbuster(mols, timeout=3000)
         if postbuster_results is not None:
             num_atoms_list = [mol.GetNumAtoms() for mol in mols]
             postbuster_results['num_atoms'] = num_atoms_list
@@ -191,7 +191,8 @@ def runner(args):
                 'double_bond_flatness', 'internal_energy'
             ]
             postbuster_results['valid_posebuster'] = postbuster_results[posebuster_checks].all(axis=1)
-
+            posebuster_checks_connected = posebuster_checks + ['all_atoms_connected']
+            postbuster_results['valid_posebuster_connected'] = postbuster_results[posebuster_checks_connected].all(axis=1)
             if args.output is None:
                 postbuster_output_path = f"{xyz_dir}/postbuster_metrics.csv"
                 hist_path = f"{xyz_dir}/postbuster_molecular_size_histogram.png"
@@ -201,6 +202,7 @@ def runner(args):
                 hist_path = f"{base}_postbuster_molecular_size_histogram.png"
 
             postbuster_results['neutral_molecule'] = neutral_mols
+            postbuster_results["filename"] = [os.path.basename(xyz) for xyz in xyz_passed]
             postbuster_results.to_csv(postbuster_output_path, index=False)
 
             logging.info(f"Molecular size mean: {postbuster_results['num_atoms'].mean():.2f}")
@@ -227,6 +229,7 @@ def runner(args):
             logging.info(f"Double Bond Flatness: {postbuster_results['double_bond_flatness'].mean():.2f}")
             logging.info(f"Internal Energy: {postbuster_results['internal_energy'].mean():.2f}")
             logging.info(f"Valid Posebuster: {postbuster_results['valid_posebuster'].mean() * 100:.2f}%")
+            logging.info(f"Valid Posebuster Connected: {postbuster_results['valid_posebuster_connected'].mean() * 100:.2f}%")
             logging.info(f"Neutral Molecule: {sum(neutral_mols) / len(neutral_mols) * 100:.2f}%")
 
 
 
@@ -5,6 +5,7 @@
 from tqdm import tqdm
 import argparse
 import torch
+import pandas as pd
 
 from MolecularDiffusion.utils import create_pyg_graph, correct_edges
 from MolecularDiffusion.utils.geom_utils import read_xyz_file
@@ -159,7 +160,9 @@ def get_xtb_optimized_xyz(
     level: str = "gfn1",
     timeout: int = 240,
     scale_factor: float = 1.3,
-    optimize_all: bool = True
+    optimize_all: bool = True,
+    csv_path: str = None,
+    filter_column: str = None
 ) -> list[str]:
     """
     Optimizes all XYZ files in a given input directory using xTB and saves them
@@ -180,6 +183,8 @@ def get_xtb_optimized_xyz(
         timeout (int, optional): The maximum time in seconds to wait for each xTB process. Defaults to 240.
         scale_factor (float, optional): The scaling factor for covalent radii in edge correction. Defaults to 1.3.
         optimize_all (bool, optional): If True, optimizes all files regardless of existing optimized versions.
+        csv_path (str, optional): Path to a CSV file to filter which XYZ files to optimize.
+        filter_column (str, optional): The column name in the CSV to filter by (values must be 1).
 
     Returns:
         list[str]: A list of paths to the successfully optimized XYZ files.
@@ -189,7 +194,49 @@ def get_xtb_optimized_xyz(
 
     os.makedirs(output_directory, exist_ok=True)
 
-    xyz_files = glob.glob(os.path.join(input_directory, "*.xyz"))
+    xyz_files = []
+    if csv_path:
+        if not os.path.exists(csv_path):
+            raise FileNotFoundError(f"CSV file not found: {csv_path}")
+        
+        df = pd.read_csv(csv_path)
+        
+        fname_col = None
+        for col in ["xyz_file", "filename", "filepath"]:
+            if col in df.columns:
+                fname_col = col
+                break
+        
+        if fname_col is None:
+             raise ValueError("CSV must contain 'xyz_file', 'filename', or 'filepath' column.")
+        
+        if filter_column:
+            if filter_column not in df.columns:
+                raise ValueError(f"Filter column '{filter_column}' not found in CSV.")
+            # Filter rows where the value is 1 (as integer or string)
+            filtered_df = df[df[filter_column].isin(['1', '1.0', True, 1])]
+        else:
+            filtered_df = df
+
+        for _, row in filtered_df.iterrows():
+            fname = str(row[fname_col])
+            # Handle potential missing extension if it's just a name
+            if not fname.lower().endswith('.xyz'):
+                 fname += '.xyz'
+            
+            if os.path.isabs(fname):
+                full_path = fname
+            else:
+                full_path = os.path.join(input_directory, fname)
+            
+            if os.path.exists(full_path):
+                xyz_files.append(full_path)
+            else:
+                print(f"Warning: File from CSV not found: {full_path}")
+                
+    else:
+        xyz_files = glob.glob(os.path.join(input_directory, "*.xyz"))
+
     optimized_files = []
 
     for xyz_file in tqdm(xyz_files, desc="Optimizing XYZ files", total=len(xyz_files)):
@@ -260,6 +307,18 @@ def get_xtb_optimized_xyz(
         default=1.3,
         help="Scaling factor for covalent radii in edge correction. Defaults to 1.3."
     )
+    parser.add_argument(
+        "--csv_path",
+        type=str,
+        default=None,
+        help="Path to CSV file for filtering which files to optimize."
+    )
+    parser.add_argument(
+        "--filter_column",
+        type=str,
+        default=None,
+        help="Column name in CSV to filter by (values must be 1 to process)."
+    )
 
     args = parser.parse_args()
 
@@ -272,7 +331,9 @@ def get_xtb_optimized_xyz(
         charge=args.charge,
         level=args.level,
         timeout=args.timeout,
-        scale_factor=args.scale_factor
+        scale_factor=args.scale_factor,
+        csv_path=args.csv_path,
+        filter_column=args.filter_column
     )
 
     print(f"Successfully optimized {len(optimized_files)} XYZ files and saved them in '{output_dir}'.")
@@ -1,13 +1,22 @@
 import sys
+import os
 from math import sqrt
 from typing import List
 
 import numpy as np
 import torch
+import rootutils
+
+# Setup root directory
+rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 
 # from navicatGA.timeout import timeout
 from numpy import dot
 from MolecularDiffusion.core import Engine
+from MolecularDiffusion.utils.plot_function import (
+    plot_kde_distribution,
+    plot_histogram_distribution,
+)
 from torch_geometric.data import Data
 from torch_geometric.nn import  radius_graph
 from torch_geometric.data import Batch
@@ -308,3 +317,82 @@ def __call__(self, xh, t):
         target = -energy_score(preds[1], preds[0])
 
         return target
+
+if __name__ == "__main__":
+    import argparse
+    import pandas as pd
+    
+    parser = argparse.ArgumentParser(description="Compute energy scores from a CSV file.")
+    parser.add_argument("--input_csv", type=str, required=True, help="Path to the input CSV file.")
+    parser.add_argument("--output_csv", type=str, required=True, help="Path to save the output CSV file.")
+    parser.add_argument("--threshold", type=float, help="Threshold to count entries with score higher than this value.")
+    args = parser.parse_args()
+
+    df = pd.read_csv(args.input_csv)
+
+    # Identify columns
+    s1_col = next((col for col in ["S1", "S1_exc", "s1"] if col in df.columns), None)
+    t1_col = next((col for col in ["T1", "T1_exc", "t1"] if col in df.columns), None)
+    
+    if not s1_col or not t1_col:
+        print(f"Error: Could not find S1 or T1 columns. Available columns: {df.columns}")
+        sys.exit(1)
+        
+    energy_scores = []
+    for index, row in df.iterrows():
+        try:
+            s1_val = float(row[s1_col])
+            t1_val = float(row[t1_col])
+            
+            # energy_score takes torch tensors
+            t1_tensor = torch.tensor(t1_val)
+            s1_tensor = torch.tensor(s1_val)
+            
+            # energy_score(x, y) where x=t1, y=s1
+            score = energy_score(t1_tensor, s1_tensor).item()
+            energy_scores.append(score)
+        except Exception as e:
+            print(f"Error processing row {index}: {e}")
+            energy_scores.append(np.nan)
+
+    df["energy_score"] = energy_scores
+    
+    # Save
+    df.to_csv(args.output_csv, index=False)
+    
+    # Statistics
+    valid_scores = [s for s in energy_scores if not np.isnan(s)]
+    if valid_scores:
+        print(f"Mean energy score: {np.mean(valid_scores)}")
+        print(f"Max energy score: {np.max(valid_scores)}")
+        print(f"Min energy score: {np.min(valid_scores)}")
+        
+        if args.threshold is not None:
+            count_above = sum(1 for s in valid_scores if s > args.threshold)
+            portion_above = count_above / len(valid_scores)
+            print(f"Entries with score > {args.threshold}: {count_above} ({portion_above:.2%})")
+
+        # Plotting
+        output_dir = os.path.dirname(args.output_csv)
+        if not output_dir:
+            output_dir = "."
+            
+        print(f"Plotting distributions to {output_dir}")
+        try:
+            # Drop NaNs for plotting
+            plot_series = df["energy_score"].dropna()
+            
+            plot_kde_distribution(
+                plot_series, 
+                "Energy Score", 
+                os.path.join(output_dir, "energy_score_kde.png")
+            )
+            plot_histogram_distribution(
+                plot_series, 
+                "Energy Score", 
+                os.path.join(output_dir, "energy_score_hist.png")
+            )
+        except Exception as e:
+            print(f"Error plotting distributions: {e}")
+    else:
+        print("No valid energy scores computed.")
@@ -1,25 +1,28 @@
 import warnings
-
-import torch
 from itertools import combinations
 
 import ase
-from ase import Atoms, neighborlist
-from ase.data import covalent_radii
-from ase.data.vdw_alvarez import vdw_radii
-from cosymlib import Geometry
 import networkx as nx
 import numpy as np
-from rdkit import Chem
-from rdkit.Chem import AllChem
-import numpy as np
 import scipy.spatial
-from ase.io.extxyz import read_xyz
+import torch
+from ase import Atoms, neighborlist
+from ase.data import covalent_radii
+from ase.data.vdw_alvarez import vdw_radii
 from morfeus import SASA
 from networkx.algorithms import community as nx_comm
+from rdkit import Chem
+from rdkit.Chem import AllChem
 
 from cell2mol.elementdata import ElementData
 
+try:
+    from cosymlib import Geometry
+    is_cosymlib_available = True
+except ImportError:
+    is_cosymlib_available = False
+    Geometry = None
+    
 # less than 4 bonds
 # 0 for S, 180 for SP, 120 for SP2, 109.5 for SP3
 hybridization_dicts = {
@@ -730,6 +733,8 @@ def atom_geom_opt(z, coords, scale_factor = 1.3):
 
 def atom_geom_shape(z, coords, scale_factor = 1.3):
 
+    if not(is_cosymlib_available):
+        raise ImportError("Cosymlib is not available, do use different featurizer")
     device = coords.device
     N = coords.size(0)