@@ -891,6 +891,61 @@ def from_json(
891891 max_ligand_chains = max_ligand_chains ,
892892 )
893893
894+ def _populate_chains (self , ent : ty .Any , info : ty .Any ) -> None :
895+ """Set entry.chains and entry.water_chains from a loaded OST entity."""
896+ self .chains = {
897+ chain .name : Chain .from_ost_chain (
898+ chain , info , len (self .chain_to_seqres .get (chain .name , "" ))
899+ )
900+ for chain in ent .chains
901+ if chain .type != mol .CHAINTYPE_WATER
902+ }
903+ self .water_chains = [
904+ chain .name for chain in ent .chains if chain .type == mol .CHAINTYPE_WATER
905+ ]
906+
907+ def _collect_ligands_from_biounit (
908+ self ,
909+ biounit : ty .Any ,
910+ biounit_id : str ,
911+ interface_proximal_gaps : dict [str , ty .Any ],
912+ plip_complex_threshold : float ,
913+ neighboring_residue_threshold : float ,
914+ neighboring_ligand_threshold : float ,
915+ data_dir : Path | None ,
916+ ) -> dict [str , "Ligand" ]:
917+ """Create Ligand objects for every ligand chain in a single biounit."""
918+ ligands : dict [str , Ligand ] = {}
919+ biounit_ligand_chains = [
920+ chain .name
921+ for chain in biounit .chains
922+ if chain .name .split ("." )[1 ] in self .ligand_like_chains
923+ ]
924+ for ligand_chain in biounit_ligand_chains :
925+ ligand_instance , ligand_asym_id = ligand_chain .split ("." )
926+ residue_numbers = [
927+ residue .number .num
928+ for residue in biounit .FindChain (ligand_chain ).residues
929+ ]
930+ ligand = Ligand .from_pli (
931+ pdb_id = self .pdb_id ,
932+ biounit_id = biounit_id ,
933+ biounit = biounit ,
934+ ligand_instance = int (ligand_instance ),
935+ ligand_chain = self .chains [ligand_asym_id ],
936+ residue_numbers = residue_numbers ,
937+ ligand_like_chains = self .ligand_like_chains ,
938+ interface_proximal_gaps = interface_proximal_gaps ,
939+ all_covalent_dict = self .covalent_bonds ,
940+ plip_complex_threshold = plip_complex_threshold ,
941+ neighboring_residue_threshold = neighboring_residue_threshold ,
942+ neighboring_ligand_threshold = neighboring_ligand_threshold ,
943+ data_dir = data_dir ,
944+ )
945+ if ligand is not None :
946+ ligands [ligand .id ] = ligand
947+ return ligands
948+
894949 @classmethod
895950 def from_cif_file (
896951 cls ,
@@ -909,32 +964,29 @@ def from_cif_file(
909964 symmetry_mate_contact_threshold : float = 5.0 ,
910965 ) -> Entry :
911966 """
912- Load an entry object from mmcif files
967+ Load an entry object from mmCIF files in the pipeline
913968
914969 Parameters
915970 ----------
916971 cif_file : Path
917- mmcif files of interest
972+ mmCIF file of interest
918973 neighboring_residue_threshold : float
919- Distance from ligand for protein \
920- residues to be considered a ligand
974+ Distance from ligand for protein residues to be considered a ligand
921975 neighboring_ligand_threshold : float
922- Distance from ligand for other ligans \
923- to be considered a ligand
976+ Distance from ligand for other ligands to be considered a ligand
924977 min_polymer_size : int = 10
925- Minimum number of residues for chain to be seen as a \
926- polymer, or Maximum number of residues for chain to be seen as a ligand \
978+ Minimum number of residues for chain to be seen as a polymer,
979+ or Maximum number of residues for chain to be seen as a ligand
927980 max_non_small_mol_ligand_length: int = 20
928- Maximum length of polymer that should be assessed for potentially being ligand
981+ Maximum length of polymer to be assessed for potentially being ligand
929982 save_folder : Path
930983 Path to save files
931984 max_protein_chains_to_save : int
932985 Maximum number of protein chains to save
933986 max_ligand_chains_to_save : int
934987 Maximum number of protein chains to save
935988 plip_complex_threshold=10
936- Maximum distance from ligand to residues to be
937- included for plip calculations.
989+ Maximum distance from ligand to residues to be included for plip calculations
938990 skip_save_systems: bool = False
939991 skips saving system files
940992 skip_posebusters: bool = False
@@ -954,9 +1006,6 @@ def from_cif_file(
9541006 )
9551007 entry_info = get_entry_info (cif_data )
9561008 per_chain = get_chain_external_mappings (cif_data )
957- # TODO: annotate_interface_gaps does not use the same ligand chain definitions as the rest
958- # move this to later after protein/ligand chain assignment?
959- interface_proximal_gaps = annotate_interface_gaps (cif_file )
9601009 resolution = entry_info .get ("entry_resolution" )
9611010 r = None
9621011 if resolution is not None :
@@ -984,16 +1033,7 @@ def from_cif_file(
9841033 chain_to_seqres = {c .name : c .string for c in seqres },
9851034 symmetry_mate_contacts = symmetry_mate_contacts ,
9861035 )
987- entry .chains = {
988- chain .name : Chain .from_ost_chain (
989- chain , info , len (entry .chain_to_seqres .get (chain .name , "" ))
990- )
991- for chain in ent .chains
992- if chain .type != mol .CHAINTYPE_WATER
993- }
994- entry .water_chains = [
995- chain .name for chain in ent .chains if chain .type == mol .CHAINTYPE_WATER
996- ]
1036+ entry ._populate_chains (ent , info )
9971037
9981038 if save_folder is not None and data_dir is None :
9991039 data_dir = save_folder .parent .parent
@@ -1006,44 +1046,29 @@ def from_cif_file(
10061046 entry .ligand_like_chains = detect_ligand_chains (
10071047 ent , entry , min_polymer_size , max_non_small_mol_ligand_length
10081048 )
1009- ligands = {}
1049+ protein_chains = [c for c in entry .chains if c not in entry .ligand_like_chains ]
1050+ interface_proximal_gaps = annotate_interface_gaps (
1051+ cif_file ,
1052+ protein_chains = protein_chains ,
1053+ ligand_chains = list (entry .ligand_like_chains .keys ()),
1054+ )
1055+ ligands : dict [str , Ligand ] = {}
10101056 biounits = {}
10111057 for biounit_info in info .biounits :
1012- biounit = mol .alg .CreateBU (ent , biounit_info )
10131058 # note, biounit chains are renamed to 1.A, 1.B, etc.
1014- biounit_ligand_chains = [
1015- chain .name
1016- for chain in biounit .chains
1017- if chain .name .split ("." )[1 ] in entry .ligand_like_chains
1018- ]
1019- for ligand_chain in biounit_ligand_chains :
1020- ligand_instance , ligand_asym_id = ligand_chain .split ("." )
1021- if save_folder is not None and data_dir is None :
1022- data_dir = save_folder .parent .parent
1023- residue_numbers = [
1024- residue .number .num
1025- for residue in biounit .FindChain (ligand_chain ).residues
1026- ]
1027- ligand = Ligand .from_pli (
1028- pdb_id = entry .pdb_id ,
1029- biounit_id = biounit_info .id ,
1030- biounit = biounit ,
1031- ligand_instance = int (ligand_instance ),
1032- ligand_chain = entry .chains [ligand_asym_id ],
1033- residue_numbers = residue_numbers ,
1034- ligand_like_chains = entry .ligand_like_chains ,
1035- interface_proximal_gaps = interface_proximal_gaps ,
1036- all_covalent_dict = entry .covalent_bonds ,
1037- plip_complex_threshold = plip_complex_threshold ,
1038- neighboring_residue_threshold = neighboring_residue_threshold ,
1039- neighboring_ligand_threshold = neighboring_ligand_threshold ,
1040- data_dir = data_dir ,
1041- )
1042- if ligand is not None :
1043- ligands [ligand .id ] = ligand
1044- # label crystal contacts
1045- ligand .label_crystal_contacts (entry .symmetry_mate_contacts )
1046-
1059+ biounit = mol .alg .CreateBU (ent , biounit_info )
1060+ new_ligands = entry ._collect_ligands_from_biounit (
1061+ biounit ,
1062+ biounit_info .id ,
1063+ interface_proximal_gaps ,
1064+ plip_complex_threshold ,
1065+ neighboring_residue_threshold ,
1066+ neighboring_ligand_threshold ,
1067+ data_dir ,
1068+ )
1069+ for ligand in new_ligands .values ():
1070+ ligand .label_crystal_contacts (entry .symmetry_mate_contacts )
1071+ ligands .update (new_ligands )
10471072 biounits [biounit_info .id ] = biounit
10481073 entry .set_systems (ligands )
10491074 entry .label_chains ()
@@ -1081,63 +1106,66 @@ def from_custom_cif_file(
10811106 max_protein_chains_to_save : int = 5 ,
10821107 max_ligand_chains_to_save : int = 5 ,
10831108 ) -> Entry :
1109+ """
1110+ Creates entry from an extrernal mmCIF file
1111+
1112+ Parameters
1113+ ----------
1114+ pdb_id : str
1115+ annotation be used in PDB ID column
1116+ cif_file : Path
1117+ mmcif files of interest
1118+ neighboring_residue_threshold : float, optional
1119+ Distance from ligand for protein residues to be considered a ligand,
1120+ by default 6.0
1121+ neighboring_ligand_threshold : float, optional
1122+ Distance from ligand for protein residues to be considered a ligand,
1123+ by default 4.0
1124+ min_polymer_size : int, optional
1125+ _description_, by default 10
1126+ save_folder : Path | None, optional
1127+ _description_, by default None
1128+ max_protein_chains_to_save : int, optional
1129+ Maximum number of protein chains to save, by default 5
1130+ max_ligand_chains_to_save : int, optional
1131+ Maximum number of protein chains to save, by default 5
1132+
1133+ Returns
1134+ -------
1135+ Entry
1136+ Entry object for the given pdbid
1137+ """
10841138 ent , seqres , info = io .LoadMMCIF (
10851139 str (cif_file ), seqres = True , info = True , remote = False
10861140 )
10871141 entry = cls (
10881142 pdb_id = pdb_id ,
10891143 chain_to_seqres = {c .name : c .string for c in seqres },
10901144 )
1091- entry .chains = {
1092- chain .name : Chain .from_ost_chain (
1093- chain , info , len (entry .chain_to_seqres .get (chain .name , "" ))
1094- )
1095- for chain in ent .chains
1096- if chain .type != mol .CHAINTYPE_WATER
1097- }
1098- entry .water_chains = [
1099- chain .name for chain in ent .chains if chain .type == mol .CHAINTYPE_WATER
1100- ]
1145+ entry ._populate_chains (ent , info )
11011146 entry .ligand_like_chains = detect_ligand_chains (
11021147 ent , entry , min_polymer_size , max_non_small_mol_ligand_length
11031148 )
1149+ protein_chains = [c for c in entry .chains if c not in entry .ligand_like_chains ]
1150+ interface_proximal_gaps = annotate_interface_gaps (
1151+ cif_file ,
1152+ protein_chains = protein_chains ,
1153+ ligand_chains = list (entry .ligand_like_chains .keys ()),
1154+ )
11041155 biounit = ent .Copy ()
11051156 edi = biounit .EditXCS (mol .BUFFERED_EDIT )
11061157 for chain in biounit .chains :
11071158 edi .RenameChain (chain , f"1.{ chain .name } " )
11081159 edi .UpdateICS ()
1109- biounit_ligand_chains = [
1110- chain .name
1111- for chain in biounit .chains
1112- if chain .name .split ("." )[1 ] in entry .ligand_like_chains
1113- ]
1114- ligands = {}
1115- for ligand_chain in biounit_ligand_chains :
1116- ligand_instance , ligand_asym_id = ligand_chain .split ("." )
1117- residue_numbers = [
1118- residue .number .num
1119- for residue in biounit .FindChain (ligand_chain ).residues
1120- ]
1121- ligand = Ligand .from_pli (
1122- pdb_id = entry .pdb_id ,
1123- biounit_id = "1" ,
1124- biounit = biounit ,
1125- ligand_instance = int (ligand_instance ),
1126- ligand_chain = entry .chains [ligand_asym_id ],
1127- residue_numbers = residue_numbers ,
1128- ligand_like_chains = entry .ligand_like_chains ,
1129- interface_proximal_gaps = {
1130- "ppi_interface_gap_annotation" : {},
1131- "ligand_interface_gap_annotation" : {},
1132- },
1133- all_covalent_dict = entry .covalent_bonds ,
1134- plip_complex_threshold = plip_complex_threshold ,
1135- neighboring_residue_threshold = neighboring_residue_threshold ,
1136- neighboring_ligand_threshold = neighboring_ligand_threshold ,
1137- data_dir = None ,
1138- )
1139- if ligand is not None :
1140- ligands [ligand .id ] = ligand
1160+ ligands = entry ._collect_ligands_from_biounit (
1161+ biounit ,
1162+ "1" , # TODO: @JAY - is this necessary to be different from `from_cif_file` ?
1163+ interface_proximal_gaps ,
1164+ plip_complex_threshold ,
1165+ neighboring_residue_threshold ,
1166+ neighboring_ligand_threshold ,
1167+ data_dir = None ,
1168+ )
11411169 entry .set_systems (ligands )
11421170 entry .label_chains ()
11431171 if save_folder is not None :
@@ -1317,8 +1345,8 @@ def format(
13171345 self , criteria : QualityCriteria = QualityCriteria ()
13181346 ) -> dict [str , ty .Any ]:
13191347 """
1320- Format label for entry-level annotations by prepending \
1321- label with "entry_"
1348+ Format label for entry-level annotations by prepending label with "entry_"
1349+
13221350 Parameters
13231351 ----------
13241352 self : Entry
0 commit comments