@@ -1562,4 +1562,84 @@ def mutate_peptide(peptide: str, positions: list, k: int) -> list:
15621562 return mutated_peptides
15631563
15641564
1565+ def add_plddt_as_bfactor_verbose (array_path , pdb_path , output_pdb_path , verbose = False ):
1566+ """
1567+ Add pLDDT values from a numpy array as B-factors to a PDB structure (verbose version).
1568+ Args:
1569+ array_path: Path to numpy array file (.npy) with shape (seq,) containing pLDDT values
1570+ pdb_path: Path to input PDB file (single chain)
1571+ output_pdb_path: Path to save output PDB file with updated B-factors
1572+ verbose: Whether to print detailed information
1573+ Returns:
1574+ dict with statistics about the operation
1575+ """
1576+ # Load pLDDT array
1577+ plddt_array = np .load (array_path )
1578+ if verbose :
1579+ print (f"Loaded pLDDT array with shape: { plddt_array .shape } " )
1580+ print (f"pLDDT range: [{ plddt_array .min ():.2f} , { plddt_array .max ():.2f} ]" )
1581+ # Initialize PDB parser
1582+ parser = PDB .PDBParser (QUIET = True )
1583+ # Parse the structure
1584+ structure = parser .get_structure ('protein' , pdb_path )
1585+ # Get the first model and first chain
1586+ model = structure [0 ]
1587+ chains = list (model .get_chains ())
1588+ if len (chains ) == 0 :
1589+ raise ValueError ("No chains found in PDB structure" )
1590+ if len (chains ) > 1 and verbose :
1591+ print (f"Warning: Multiple chains found ({ len (chains )} ). Using first chain: { chains [0 ].id } " )
1592+ # Use the first chain
1593+ chain = chains [0 ]
1594+ # Get all residues (excluding hetero atoms)
1595+ residues = [res for res in chain .get_residues () if res .id [0 ] == ' ' ]
1596+ # Check dimensions
1597+ n_residues = len (residues )
1598+ n_plddt = len (residues )
1599+ if verbose :
1600+ print (f"\n Processing { n_residues } residues:" )
1601+ print (f"{ 'Residue' :<10} { 'Res_Num' :<10} { 'Array_Idx' :<12} { 'pLDDT' :<10} { 'N_Atoms' :<10} " )
1602+ print ("-" * 60 )
1603+ # Statistics
1604+ stats = {
1605+ 'n_residues' : n_residues ,
1606+ 'n_atoms_total' : 0 ,
1607+ 'plddt_values' : []
1608+ }
1609+ # Iterate over residues (residue 1 -> array index 0)
1610+ for array_idx , residue in enumerate (residues ):
1611+ # Get pLDDT value from array
1612+ plddt_value = float (plddt_array [array_idx ])
1613+ stats ['plddt_values' ].append (plddt_value )
1614+ # Get residue information
1615+ res_name = residue .get_resname ()
1616+ res_num = residue .id [1 ] # Residue number from PDB
1617+ # Count atoms in this residue
1618+ atoms = list (residue .get_atoms ())
1619+ n_atoms = len (atoms )
1620+ stats ['n_atoms_total' ] += n_atoms
1621+ # Set B-factor for all atoms in this residue
1622+ for atom in atoms :
1623+ atom .set_bfactor (plddt_value )
1624+ # Verbose output for first 5 and last 5 residues
1625+ if verbose and (array_idx < 5 or array_idx >= n_residues - 5 ):
1626+ print (f"{ res_name :<10} { res_num :<10} { array_idx :<12} { plddt_value :<10.2f} { n_atoms :<10} " )
1627+ elif verbose and array_idx == 5 :
1628+ print ("..." )
1629+ io = PDBIO ()
1630+ io .set_structure (structure )
1631+ io .save (output_pdb_path )
1632+ if verbose :
1633+ print ("\n " + "=" * 60 )
1634+ print (f"Summary:" )
1635+ print (f" Output saved to: { output_pdb_path } " )
1636+ print (f" Total residues processed: { stats ['n_residues' ]} " )
1637+ print (f" Total atoms updated: { stats ['n_atoms_total' ]} " )
1638+ print (f" pLDDT statistics:" )
1639+ print (f" Mean: { np .mean (stats ['plddt_values' ]):.2f} " )
1640+ print (f" Std: { np .std (stats ['plddt_values' ]):.2f} " )
1641+ print (f" Min: { np .min (stats ['plddt_values' ]):.2f} " )
1642+ print (f" Max: { np .max (stats ['plddt_values' ]):.2f} " )
1643+ print ("=" * 60 )
1644+ return stats
15651645
0 commit comments