lucidrains
diff --git a/‎alphafold3_pytorch/alphafold3.py‎
Lines changed: 1 addition & 0 deletions b/‎alphafold3_pytorch/alphafold3.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎alphafold3_pytorch/data/mmcif_writing.py‎
Lines changed: 6 additions & 15 deletions b/‎alphafold3_pytorch/data/mmcif_writing.py‎
Lines changed: 6 additions & 15 deletions
diff --git a/‎alphafold3_pytorch/inputs.py‎
Lines changed: 7 additions & 0 deletions b/‎alphafold3_pytorch/inputs.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎alphafold3_pytorch/trainer.py‎
Lines changed: 8 additions & 2 deletions b/‎alphafold3_pytorch/trainer.py‎
Lines changed: 8 additions & 2 deletions
@@ -4833,6 +4833,7 @@ def forward(
         plddt_labels: Int['b n'] | Int['b m'] | None = None,
         resolved_labels: Int['b n'] | Int['b m'] | None = None,
         chains: Int['b 2'] | None = None,
+        filepath: List[str] | None = None,
         return_loss_breakdown = False,
         return_loss: bool = None,
         return_present_sampled_atoms: bool = False,
 
@@ -12,24 +12,15 @@
 from alphafold3_pytorch.data.mmcif_parsing import MmcifObject, parse_mmcif_object
 from alphafold3_pytorch.utils.utils import exists
 
+
 def write_mmcif_from_filepath_and_id(
-    filepath: str,
-    file_id: str,
-    suffix: str = 'sampled',
-    **kwargs
+    input_filepath: str, output_filepath: str, file_id: str, **kwargs
 ):
-    mmcif_object = parse_mmcif_object(
-        filepath = filepath,
-        file_id = file_id
-    )
+    """Write an input mmCIF file to an output mmCIF filepath using the provided keyword arguments
+    (e.g., sampled coordinates)."""
+    mmcif_object = parse_mmcif_object(filepath=input_filepath, file_id=file_id)
+    return write_mmcif(mmcif_object, output_filepath=output_filepath, **kwargs)
 
-    output_filepath = filepath.replace(".cif", f"-{suffix}.cif")
-
-    return write_mmcif(
-        mmcif_object,
-        output_filepath = output_filepath,
-        **kwargs
-    )
 
 def write_mmcif(
     mmcif_object: MmcifObject,
 
@@ -178,6 +178,7 @@ class AtomInput:
     plddt_labels:               Int[' n'] | None = None
     resolved_labels:            Int[' n'] | None = None
     chains:                     Int[" 2"] | None = None
+    filepath:                   str | None = None
 
     def dict(self):
         return asdict(self)
@@ -211,6 +212,7 @@ class BatchedAtomInput:
     plddt_labels:               Int['b n'] | None = None
     resolved_labels:            Int['b n'] | None = None
     chains:                     Int["b 2"] | None = None
+    filepath:                   List[str] | None = None
 
     def dict(self):
         return asdict(self)
@@ -432,6 +434,7 @@ class MoleculeInput:
     pde_labels:                 Int[' n'] | None = None
     resolved_labels:            Int[' n'] | None = None
     chains:                     Tuple[int | None, int | None] | None = (None, None)
+    filepath:                   str | None = None
     add_atom_ids:               bool = False
     add_atompair_ids:           bool = False
     directed_bonds:             bool = False
@@ -712,6 +715,7 @@ def molecule_to_atom_input(mol_input: MoleculeInput) -> AtomInput:
         atom_ids=atom_ids,
         atompair_ids=atompair_ids,
         chains=chains,
+        filepath=i.filepath,
     )
 
     return atom_input
@@ -749,6 +753,7 @@ class MoleculeLengthMoleculeInput:
     pde_labels:                 Int[' n'] | None = None
     resolved_labels:            Int[' n'] | None = None
     chains:                     Tuple[int | None, int | None] | None = (None, None)
+    filepath:                   str | None = None
     add_atom_ids:               bool = False
     add_atompair_ids:           bool = False
     directed_bonds:             bool = False
@@ -1135,6 +1140,7 @@ def molecule_lengthed_molecule_input_to_atom_input(mol_input: MoleculeLengthMole
         atom_ids = atom_ids,
         atompair_ids = atompair_ids,
         chains = chains,
+        filepath=i.filepath,
     )
 
     return atom_input
@@ -2602,6 +2608,7 @@ def pdb_input_to_molecule_input(
         template_mask=template_mask,
         msa_mask=msa_mask,
         chains=chains,
+        filepath=filepath,
         add_atom_ids=i.add_atom_ids,
         add_atompair_ids=i.add_atompair_ids,
         directed_bonds=i.directed_bonds,
 
@@ -142,12 +142,12 @@ def collate_inputs_to_batched_atom_input(
 
     # separate input dictionary into keys and values
 
-    keys = atom_inputs[0].dict().keys()
+    keys = list(atom_inputs[0].dict().keys())
     atom_inputs = [i.dict().values() for i in atom_inputs]
 
     outputs = []
 
-    for grouped in zip(*atom_inputs):
+    for group_index, grouped in enumerate(zip(*atom_inputs)):
         # if all None, just return None
 
         not_none_grouped = [*filter(exists, grouped)]
@@ -156,6 +156,12 @@ def collate_inputs_to_batched_atom_input(
             outputs.append(None)
             continue
 
+        # collate list of input filepath strings
+
+        if keys[group_index] == "filepath":
+            outputs.append(not_none_grouped)
+            continue
+
         # default to empty tensor for any Nones
 
         one_tensor = not_none_grouped[0]