Skip to content

Commit e8454b4

Browse files
committed
clean_fasta_headers added for lower versions of netmhcpan
1 parent e1cc39e commit e8454b4

File tree

2 files changed

+83
-1
lines changed

2 files changed

+83
-1
lines changed

run_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,9 @@ def __binder_pred(self):
747747
output_dir = os.path.join(self.output_dir, 'peptide_design')
748748
peptide_fasta_file = [i for i in os.listdir(output_dir+'/'+'seqs') if i.endswith('.fa')][0]
749749
peptide_fasta_file = os.path.join(output_dir+'/'+'seqs', peptide_fasta_file)
750-
processing_functions.clean_fasta_headers(peptide_fasta_file, peptide_fasta_file)
750+
processing_functions.clean_fasta_headers(peptide_fasta_file, peptide_fasta_file+'tmp.fa')
751+
os.remove(peptide_fasta_file)
752+
shutil.move(peptide_fasta_file+'tmp.fa', peptide_fasta_file)
751753
mhc_type = 2 if len(self.chain_dict_dist.keys()) == 2 else 1
752754
mhc_seq_dict = processing_functions.fetch_polypeptide_sequences(self.multichain_pdb)
753755
mhc_seq_list = [mhc_seq_dict['A'], mhc_seq_dict['B']] if mhc_type==2 else [mhc_seq_dict['A']]

utils/processing_functions.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,4 +1562,84 @@ def mutate_peptide(peptide: str, positions: list, k: int) -> list:
15621562
return mutated_peptides
15631563

15641564

1565+
def add_plddt_as_bfactor_verbose(array_path, pdb_path, output_pdb_path, verbose=False):
1566+
"""
1567+
Add pLDDT values from a numpy array as B-factors to a PDB structure (verbose version).
1568+
Args:
1569+
array_path: Path to numpy array file (.npy) with shape (seq,) containing pLDDT values
1570+
pdb_path: Path to input PDB file (single chain)
1571+
output_pdb_path: Path to save output PDB file with updated B-factors
1572+
verbose: Whether to print detailed information
1573+
Returns:
1574+
dict with statistics about the operation
1575+
"""
1576+
# Load pLDDT array
1577+
plddt_array = np.load(array_path)
1578+
if verbose:
1579+
print(f"Loaded pLDDT array with shape: {plddt_array.shape}")
1580+
print(f"pLDDT range: [{plddt_array.min():.2f}, {plddt_array.max():.2f}]")
1581+
# Initialize PDB parser
1582+
parser = PDB.PDBParser(QUIET=True)
1583+
# Parse the structure
1584+
structure = parser.get_structure('protein', pdb_path)
1585+
# Get the first model and first chain
1586+
model = structure[0]
1587+
chains = list(model.get_chains())
1588+
if len(chains) == 0:
1589+
raise ValueError("No chains found in PDB structure")
1590+
if len(chains) > 1 and verbose:
1591+
print(f"Warning: Multiple chains found ({len(chains)}). Using first chain: {chains[0].id}")
1592+
# Use the first chain
1593+
chain = chains[0]
1594+
# Get all residues (excluding hetero atoms)
1595+
residues = [res for res in chain.get_residues() if res.id[0] == ' ']
1596+
# Check dimensions
1597+
n_residues = len(residues)
1598+
n_plddt = len(residues)
1599+
if verbose:
1600+
print(f"\nProcessing {n_residues} residues:")
1601+
print(f"{'Residue':<10} {'Res_Num':<10} {'Array_Idx':<12} {'pLDDT':<10} {'N_Atoms':<10}")
1602+
print("-" * 60)
1603+
# Statistics
1604+
stats = {
1605+
'n_residues': n_residues,
1606+
'n_atoms_total': 0,
1607+
'plddt_values': []
1608+
}
1609+
# Iterate over residues (residue 1 -> array index 0)
1610+
for array_idx, residue in enumerate(residues):
1611+
# Get pLDDT value from array
1612+
plddt_value = float(plddt_array[array_idx])
1613+
stats['plddt_values'].append(plddt_value)
1614+
# Get residue information
1615+
res_name = residue.get_resname()
1616+
res_num = residue.id[1] # Residue number from PDB
1617+
# Count atoms in this residue
1618+
atoms = list(residue.get_atoms())
1619+
n_atoms = len(atoms)
1620+
stats['n_atoms_total'] += n_atoms
1621+
# Set B-factor for all atoms in this residue
1622+
for atom in atoms:
1623+
atom.set_bfactor(plddt_value)
1624+
# Verbose output for first 5 and last 5 residues
1625+
if verbose and (array_idx < 5 or array_idx >= n_residues - 5):
1626+
print(f"{res_name:<10} {res_num:<10} {array_idx:<12} {plddt_value:<10.2f} {n_atoms:<10}")
1627+
elif verbose and array_idx == 5:
1628+
print("...")
1629+
io = PDBIO()
1630+
io.set_structure(structure)
1631+
io.save(output_pdb_path)
1632+
if verbose:
1633+
print("\n" + "=" * 60)
1634+
print(f"Summary:")
1635+
print(f" Output saved to: {output_pdb_path}")
1636+
print(f" Total residues processed: {stats['n_residues']}")
1637+
print(f" Total atoms updated: {stats['n_atoms_total']}")
1638+
print(f" pLDDT statistics:")
1639+
print(f" Mean: {np.mean(stats['plddt_values']):.2f}")
1640+
print(f" Std: {np.std(stats['plddt_values']):.2f}")
1641+
print(f" Min: {np.min(stats['plddt_values']):.2f}")
1642+
print(f" Max: {np.max(stats['plddt_values']):.2f}")
1643+
print("=" * 60)
1644+
return stats
15651645

0 commit comments

Comments
 (0)