11import warnings
2+ from functools import reduce
23from pathlib import Path
34import os
4- from pyhdx .models import PeptideMasterTable , HDXMeasurement , HDXMeasurementSet
5- from pyhdx .fileIO import read_dynamx
5+ import re
66
7+ from pyhdx import TorchFitResult
8+ from pyhdx .models import PeptideMasterTable , HDXMeasurement , HDXMeasurementSet
9+ from pyhdx .fileIO import read_dynamx , csv_to_dataframe , save_fitresult
10+ from pyhdx .fitting import fit_rates_half_time_interpolate , fit_rates_weighted_average , \
11+ fit_gibbs_global , fit_gibbs_global_batch , RatesFitResult , GenericFitResult
12+ import param
13+ import pandas as pd
14+ from pyhdx .support import gen_subclasses
15+ import yaml
716
817time_factors = {"s" : 1 , "m" : 60.0 , "min" : 60.0 , "h" : 3600 , "d" : 86400 }
918temperature_offsets = {"c" : 273.15 , "celsius" : 273.15 , "k" : 0 , "kelvin" : 0 }
1019
11- # todo add data filters in yaml spec
20+
21+ # todo add data filters in state spec?
1222# todo add proline, n_term options
13- class YamlParser (object ):
14- "" 'object used to parse yaml data input files into PyHDX HDX Measurement object'
23+ class StateParser (object ):
24+ "" 'object used to parse yaml state input files into PyHDX HDX Measurement object'
1525
16- def __init__ (self , yaml_dict , data_src = None , data_filters = None ):
17- self .yaml_dict = yaml_dict
26+ # todo yaml_dict -> state_spec
27+ def __init__ (self , state_spec , data_src = None , data_filters = None ):
28+ self .state_spec = state_spec
1829 if isinstance (data_src , (os .PathLike , str )):
1930 self .data_src = Path (data_src )
2031 elif isinstance (data_src , dict ):
@@ -44,7 +55,7 @@ def load_data(self, *filenames, reader='dynamx'):
4455 def load_hdxmset (self ):
4556 """batch read the full yaml spec into a hdxmeasurementset"""
4657 hdxm_list = []
47- for state in self .yaml_dict .keys ():
58+ for state in self .state_spec .keys ():
4859 hdxm = self .load_hdxm (state , name = state )
4960 hdxm_list .append (hdxm )
5061
@@ -55,7 +66,7 @@ def load_hdxm(self, state, **kwargs):
5566 kwargs: additional kwargs passed to hdxmeasurementset
5667 """
5768
58- state_dict = self .yaml_dict [state ]
69+ state_dict = self .state_spec [state ]
5970
6071 filenames = state_dict ["filenames" ]
6172 df = self .load_data (* filenames )
@@ -95,8 +106,8 @@ def load_hdxm(self, state, **kwargs):
95106 raise ValueError ("Must specify either 'c_term' or 'sequence'" )
96107
97108 state_data = pmt .get_state (state_dict ["state" ])
98- for filter in self .data_filters :
99- state_data = filter (state_data )
109+ for flt in self .data_filters :
110+ state_data = flt (state_data )
100111
101112 hdxm = HDXMeasurement (
102113 state_data ,
@@ -111,16 +122,169 @@ def load_hdxm(self, state, **kwargs):
111122 return hdxm
112123
113124
125+ process_functions = {
126+ 'csv_to_dataframe' : csv_to_dataframe ,
127+ 'fit_rates_half_time_interpolate' : fit_rates_half_time_interpolate ,
128+ 'fit_rates_weighted_average' : fit_rates_weighted_average ,
129+ 'fit_gibbs_global' : fit_gibbs_global
130+
131+ }
132+
133+ # task objects should be param
134+ class Task (param .Parameterized ):
135+ ...
136+
137+ scheduler_address = param .String (doc = 'Optional scheduler adress for dask task' )
138+
139+ cwd = param .ClassSelector (Path , doc = 'Path of the current working directory' )
140+
141+
142+ class LoadHDMeasurementSetTask (Task ):
143+ _type = 'load_hdxm_set'
144+
145+ state_file = param .String () # = string path
146+
147+ out = param .ClassSelector (HDXMeasurementSet )
148+
149+ def execute (self , * args , ** kwargs ):
150+ state_spec = yaml .safe_load ((self .cwd / self .state_file ).read_text ())
151+ parser = StateParser (state_spec , self .cwd , default_filters )
152+ hdxm_set = parser .load_hdxmset ()
153+
154+ self .out = hdxm_set
155+
156+
157+ class EstimateRates (Task ):
158+ _type = 'estimate_rates'
159+
160+ hdxm_set = param .ClassSelector (HDXMeasurementSet )
161+
162+ select_state = param .String (doc = 'If set, only use this state for creating initial guesses' )
163+
164+ out = param .ClassSelector ((RatesFitResult , GenericFitResult ))
165+
166+ def execute (self , * args , ** kwargs ):
167+ if self .select_state : # refactor to 'state' ?
168+ hdxm = self .hdxm_set .get (self .select_state )
169+ result = fit_rates_half_time_interpolate (hdxm )
170+ else :
171+ results = []
172+ for hdxm in self .hdxm_set :
173+ r = fit_rates_half_time_interpolate (hdxm )
174+ results .append (r )
175+ result = RatesFitResult (results )
176+
177+ self .out = result
178+
179+
180+ # todo allow guesses from deltaG
181+ class ProcessGuesses (Task ):
182+ _type = 'create_guess'
183+
184+ hdxm_set = param .ClassSelector (HDXMeasurementSet )
185+
186+ select_state = param .String (doc = 'If set, only use this state for creating initial guesses' )
187+
188+ rates_df = param .ClassSelector (pd .DataFrame )
189+
190+ out = param .ClassSelector ((pd .Series , pd .DataFrame ))
191+
192+ def execute (self , * args , ** kwargs ):
193+ if self .select_state :
194+ hdxm = self .hdxm_set .get (self .select_state )
195+ if self .rates_df .columns .nlevels == 2 :
196+ rates_series = self .rates_df [(self .select_state , 'rate' )]
197+ else :
198+ rates_series = self .rates_df ['rate' ]
199+
200+ guess = hdxm .guess_deltaG (rates_series )
201+
202+ else :
203+ rates = self .rates_df .xs ('rate' , level = - 1 , axis = 1 )
204+ guess = self .hdxm_set .guess_deltaG (rates )
205+
206+ self .out = guess
207+
208+
209+ class FitGlobalBatch (Task ):
210+ _type = 'fit_global_batch'
211+
212+ hdxm_set = param .ClassSelector (HDXMeasurementSet )
213+
214+ initial_guess = param .ClassSelector (
215+ (pd .Series , pd .DataFrame ), doc = 'Initial guesses for fits' )
216+
217+ out = param .ClassSelector (TorchFitResult )
218+
219+ def execute (self , * args , ** kwargs ):
220+ result = fit_gibbs_global_batch (self .hdxm_set , self .initial_guess , ** kwargs )
221+
222+ self .out = result
223+
224+
225+ class SaveFitResult (Task ):
226+ _type = 'save_fit_result'
227+
228+ fit_result = param .ClassSelector (TorchFitResult )
229+
230+ output_dir = param .String ()
231+
232+ def execute (self , * args , ** kwargs ):
233+ save_fitresult (self .cwd / self .output_dir , self .fit_result )
234+
235+
236+ class JobParser (object ):
237+
238+ cwd = param .ClassSelector (Path , doc = 'Path of the current working directory' )
239+
240+ def __init__ (self , job_spec , cwd = None ):
241+ self .job_spec = job_spec
242+ self .cwd = cwd or Path ().cwd ()
243+
244+ self .tasks = {}
245+ self .task_classes = {cls ._type : cls for cls in gen_subclasses (Task ) if getattr (cls , "_type" , None )}
246+
247+ def resolve_var (self , var_string ):
248+ task_name , * attrs = var_string .split ('.' )
249+
250+ return reduce (getattr , attrs , self .tasks [task_name ])
251+
252+ def execute (self ):
253+
254+ for task_spec in self .job_spec ['steps' ]:
255+ task_klass = self .task_classes [task_spec ['task' ]]
256+ skip = {'args' , 'kwargs' , 'task' }
257+
258+ resolved_params = {}
259+ for par_name in task_spec .keys () - skip :
260+ value = task_spec [par_name ]
261+ if isinstance (value , str ):
262+ m = re .findall (r'\$\((.*?)\)' , value )
263+ if m :
264+ value = self .resolve_var (m [0 ])
265+ resolved_params [par_name ] = value
266+ task = task_klass (cwd = self .cwd , ** resolved_params )
267+ task .execute (* task_spec .get ('args' , []), ** task_spec .get ('kwargs' , {}))
268+
269+ self .tasks [task .name ] = task
270+
271+
114272def yaml_to_hdxmset (yaml_dict , data_dir = None , ** kwargs ):
115273 """reads files according to `yaml_dict` spec from `data_dir into HDXMEasurementSet"""
116274
275+ warnings .warn ("yaml_to_hdxmset is deprecated, use 'StateParser'" )
117276 hdxm_list = []
118277 for k , v in yaml_dict .items ():
119278 hdxm = yaml_to_hdxm (v , data_dir = data_dir , name = k )
120279 hdxm_list .append (hdxm )
121280
122281 return HDXMeasurementSet (hdxm_list )
123282
283+ # todo configurable
284+ default_filters = [
285+ lambda df : df .query ('exposure > 0' )
286+ ]
287+
124288
125289def yaml_to_hdxm (yaml_dict , data_dir = None , data_filters = None , ** kwargs ):
126290 # todo perhas classmethod on HDXMeasurement object?
@@ -142,7 +306,7 @@ def yaml_to_hdxm(yaml_dict, data_dir=None, data_filters=None, **kwargs):
142306 Output data object as specified by `yaml_dict`.
143307 """
144308
145- warnings .warn ('This method is deprecated in favor of YamlParser ' , DeprecationWarning )
309+ warnings .warn ('This method is deprecated in favor of StateParser ' , DeprecationWarning )
146310
147311 if data_dir is not None :
148312 input_files = [Path (data_dir ) / fname for fname in yaml_dict ["filenames" ]]
@@ -270,3 +434,5 @@ def load_from_yaml_v040b2(yaml_dict, data_dir=None, **kwargs): # pragma: no cov
270434 )
271435
272436 return hdxm
437+
438+
0 commit comments