@@ -193,7 +193,7 @@ def run(self):
193193 # os.remove('.tmpmap')
194194 # cfiles = glob.glob('.contacts*')
195195 # [os.remove(f) for f in cfiles]
196- print (f'\n Saved contacts to "contacts_{ self .cutoff } .npy "' )
196+ print (f'\n Saved contacts to "contacts_{ self .cutoff } .pkl "' )
197197
198198 def _lipswap (self , lip , memarr , i ):
199199 from basicrta .util import get_dec
@@ -232,6 +232,134 @@ def _lipswap(self, lip, memarr, i):
232232 return len (dset )
233233
234234
235+ class CombineContacts (object ):
236+ """Class to combine contact timeseries from multiple repeat runs.
237+
238+ This class enables pooling data from multiple trajectory repeats and
239+ calculating posteriors from all data together, rather than analyzing
240+ each run separately.
241+
242+ :param contact_files: List of contact pickle files to combine
243+ :type contact_files: list of str
244+ :param output_name: Name for the combined output file (default: 'combined_contacts.pkl')
245+ :type output_name: str, optional
246+ :param validate_compatibility: Whether to validate that files are compatible (default: True)
247+ :type validate_compatibility: bool, optional
248+ """
249+
250+ def __init__ (self , contact_files , output_name = 'combined_contacts.pkl' ,
251+ validate_compatibility = True ):
252+ self .contact_files = contact_files
253+ self .output_name = output_name
254+ self .validate_compatibility = validate_compatibility
255+
256+ if len (contact_files ) < 2 :
257+ raise ValueError ("At least 2 contact files are required for combining" )
258+
259+ def _load_contact_file (self , filename ):
260+ """Load a contact pickle file and return data and metadata."""
261+ if not os .path .exists (filename ):
262+ raise FileNotFoundError (f"Contact file not found: { filename } " )
263+
264+ with open (filename , 'rb' ) as f :
265+ contacts = pickle .load (f )
266+
267+ metadata = contacts .dtype .metadata
268+ return contacts , metadata
269+
270+ def _validate_compatibility (self , metadatas ):
271+ """Validate that contact files are compatible for combining."""
272+ reference = metadatas [0 ]
273+
274+ # Check that all files have the same atom groups
275+ for i , meta in enumerate (metadatas [1 :], 1 ):
276+ # Compare cutoff
277+ if meta ['cutoff' ] != reference ['cutoff' ]:
278+ raise ValueError (f"Incompatible cutoffs: file 0 has { reference ['cutoff' ]} , "
279+ f"file { i } has { meta ['cutoff' ]} " )
280+
281+ # Compare atom group selections by checking if resids match
282+ ref_ag1_resids = set (reference ['ag1' ].residues .resids )
283+ ref_ag2_resids = set (reference ['ag2' ].residues .resids )
284+ meta_ag1_resids = set (meta ['ag1' ].residues .resids )
285+ meta_ag2_resids = set (meta ['ag2' ].residues .resids )
286+
287+ if ref_ag1_resids != meta_ag1_resids :
288+ raise ValueError (f"Incompatible ag1 residues between file 0 and file { i } " )
289+ if ref_ag2_resids != meta_ag2_resids :
290+ raise ValueError (f"Incompatible ag2 residues between file 0 and file { i } " )
291+
292+ # Check timesteps and warn if different
293+ timesteps = [meta ['ts' ] for meta in metadatas ]
294+ if not all (abs (ts - timesteps [0 ]) < 1e-6 for ts in timesteps ):
295+ print ("WARNING: Different timesteps detected across runs:" )
296+ for i , (filename , ts ) in enumerate (zip (self .contact_files , timesteps )):
297+ print (f" File { i } ({ filename } ): dt = { ts } ns" )
298+ print ("This may affect residence time estimates, especially for fast events." )
299+
300+ def run (self ):
301+ """Combine contact files and save the result."""
302+ print (f"Combining { len (self .contact_files )} contact files..." )
303+
304+ all_contacts = []
305+ all_metadatas = []
306+
307+ # Load all contact files
308+ for i , filename in enumerate (self .contact_files ):
309+ print (f"Loading file { i + 1 } /{ len (self .contact_files )} : { filename } " )
310+ contacts , metadata = self ._load_contact_file (filename )
311+ all_contacts .append (contacts )
312+ all_metadatas .append (metadata )
313+
314+ # Validate compatibility if requested
315+ if self .validate_compatibility :
316+ print ("Validating file compatibility..." )
317+ self ._validate_compatibility (all_metadatas )
318+
319+ # Combine contact data
320+ print ("Combining contact data..." )
321+
322+ # Calculate total size and create combined array
323+ total_size = sum (len (contacts ) for contacts in all_contacts )
324+ reference_metadata = all_metadatas [0 ].copy ()
325+
326+ # Extend metadata to include trajectory source information
327+ reference_metadata ['source_files' ] = self .contact_files
328+ reference_metadata ['n_trajectories' ] = len (self .contact_files )
329+
330+ # Determine number of columns (5 for raw contacts, 4 for processed)
331+ n_cols = all_contacts [0 ].shape [1 ]
332+
333+ # Create dtype with extended metadata
334+ combined_dtype = np .dtype (np .float64 , metadata = reference_metadata )
335+
336+ # Add trajectory source column (will be last column)
337+ combined_contacts = np .zeros ((total_size , n_cols + 1 ), dtype = np .float64 )
338+
339+ # Combine data and add trajectory source information
340+ offset = 0
341+ for traj_idx , contacts in enumerate (all_contacts ):
342+ n_contacts = len (contacts )
343+ # Copy original contact data
344+ combined_contacts [offset :offset + n_contacts , :n_cols ] = contacts [:]
345+ # Add trajectory source index
346+ combined_contacts [offset :offset + n_contacts , n_cols ] = traj_idx
347+ offset += n_contacts
348+
349+ # Create final memmap with proper dtype
350+ final_contacts = combined_contacts .view (combined_dtype )
351+
352+ # Save combined contacts
353+ print (f"Saving combined contacts to { self .output_name } ..." )
354+ final_contacts .dump (self .output_name , protocol = 5 )
355+
356+ print (f"Successfully combined { len (self .contact_files )} files into { self .output_name } " )
357+ print (f"Total contacts: { total_size } " )
358+ print (f"Added trajectory source column (index { n_cols } ) for kinetic clustering support" )
359+
360+ return self .output_name
361+
362+
235363if __name__ == '__main__' :
236364 """DOCSSS
237365 """
0 commit comments