88
99def reshape_sa_expanded_data (expanded_data : pd .DataFrame , qoi_columns : list ) -> pd .DataFrame :
1010 """Reshape expanded sensitivity analysis data for pivot table analysis.
11-
11+
1212 This function transforms time-series data from long format (multiple rows per sample)
1313 to wide format (columns for each time point) to facilitate statistical analysis.
14-
14+
1515 Args:
1616 expanded_data (pd.DataFrame): DataFrame containing expanded simulation data
1717 with SampleID, ReplicateID, time, and QoI columns.
1818 qoi_columns (list): List of quantity of interest column names to reshape.
19-
19+
2020 Returns:
2121 pd.DataFrame: Reshaped DataFrame with multi-level columns where each QoI
2222 and time point becomes a separate column indexed by SampleID
2323 and ReplicateID.
24-
24+
2525 Example:
2626 >>> data = pd.DataFrame({
2727 ... 'SampleID': [0, 0, 1, 1],
@@ -59,11 +59,11 @@ def reshape_sa_expanded_data(expanded_data: pd.DataFrame, qoi_columns: list) ->
5959
6060def mcds_list_to_qoi_df_for_sa (recreated_qoi_funcs , all_sample_ids , chunk_size , db_file ) -> pd .DataFrame :
6161 """Convert a list of MCDS objects to a DataFrame of quantities of interest for sensitivity analysis.
62-
62+
6363 This function processes a list of MCDS simulation results, extracting relevant
6464 quantities of interest (QoIs) at each time point and organizing them into a
6565 structured DataFrame suitable for sensitivity analysis.
66-
66+
6767 Args:
6868 recreated_qoi_funcs (dict): Dictionary of QoI functions where keys are QoI names
6969 and values are callable functions.
@@ -88,7 +88,7 @@ def mcds_list_to_qoi_df_for_sa(recreated_qoi_funcs, all_sample_ids, chunk_size,
8888 data = {'SampleID' : SampleID , 'ReplicateID' : ReplicateID }
8989 for id_time , mcds in enumerate (mcds_ts_list ):
9090 data [f"time_{ id_time } " ] = mcds .get_time ()
91- try :
91+ try :
9292 for qoi_name , qoi_func in recreated_qoi_funcs .items ():
9393 function_result = safe_call_qoi_function (qoi_func , mcds = mcds , list_mcds = mcds_ts_list )
9494 if function_result is not None :
@@ -99,17 +99,63 @@ def mcds_list_to_qoi_df_for_sa(recreated_qoi_funcs, all_sample_ids, chunk_size,
9999 # Store the data in a DataFrame
100100 df_qoi_replicate = pd .DataFrame ({key : [value ] for key , value in data .items ()})
101101 df_qois = pd .concat ([df_qois , df_qoi_replicate ], ignore_index = True )
102-
102+
103103 df_qois = df_qois .reset_index (drop = True )
104104 return df_qois
105105
106+ def mcds_list_to_qoi_df_long (recreated_qoi_funcs , all_sample_ids , chunk_size , db_file ) -> pd .DataFrame :
107+ """Convert a list of MCDS objects to a DataFrame of quantities of interest in long format.
108+
109+ This function processes a list of MCDS simulation results, extracting relevant
110+ quantities of interest (QoIs) at each time point and organizing them into a long
111+ structured DataFrame.
112+
113+ Args:
114+ recreated_qoi_funcs (dict): Dictionary of QoI functions where keys are QoI names
115+ and values are callable functions.
116+ all_sample_ids (list): List of all sample IDs to process.
117+ chunk_size (int): Number of samples to process in each chunk to manage memory usage.
118+ db_file (str): Path to the database file containing simulation output.
119+ Returns:
120+ pd.DataFrame: DataFrame with calculated QoI values indexed by SampleID
121+ and ReplicateID, with columns for each QoI - columns combined with time points.
122+ """
123+ # Process samples in chunks to avoid memory issues
124+ ls_column = ['SampleID' ,'ReplicateID' , 'time' ] + sorted (recreated_qoi_funcs .keys ())
125+ llo_data = []
126+ for i in range (0 , len (all_sample_ids ), chunk_size ):
127+ chunk_sample_ids = all_sample_ids [i :i + chunk_size ]
128+ # Load only this chunk of data
129+ df_output = load_output (db_file , sample_ids = chunk_sample_ids , load_data = True )
130+ for SampleID in sorted (df_output ['SampleID' ].unique ()):
131+ df_sample = df_output [df_output ['SampleID' ] == SampleID ]
132+ df_qoi_replicate = pd .DataFrame ()
133+ for ReplicateID in sorted (df_sample ['ReplicateID' ].unique ()):
134+ mcds_ts_list = df_sample [df_sample ['ReplicateID' ] == ReplicateID ]['Data' ].values [0 ]
135+ # print(f"SampleID: {SampleID}, ReplicateID: {ReplicateID} - mcds_ts_list: {mcds_ts_list}")
136+ for mcds in mcds_ts_list :
137+ lo_data = [SampleID , ReplicateID , mcds .get_time ()]
138+ try :
139+ for qoi_name , qoi_func in sorted (recreated_qoi_funcs .items ()):
140+ # Store functions the qoi result
141+ function_result = safe_call_qoi_function (qoi_func , mcds = mcds , list_mcds = mcds_ts_list )
142+ lo_data .append (function_result )
143+ except Exception as e :
144+ raise RuntimeError (f"Error calculating QoIs for SampleID: { SampleID } , ReplicateID: { ReplicateID } - QoI: { qoi_name } _{ id_time } : { e } " )
145+ # Store the mcds results
146+ llo_data .append (lo_data )
147+
148+ # Gernate data frame
149+ df_qois = pd .DataFrame (llo_data , columns = ls_column )
150+ return df_qois
151+
106152def mcds_list_to_qoi_df_for_calib (recreated_qoi_funcs , all_sample_ids , chunk_size , db_file ) -> pd .DataFrame :
107153 """Convert a list of MCDS objects to a DataFrame of quantities of interest for calibration.
108-
154+
109155 This function processes a list of MCDS simulation results, extracting relevant
110156 quantities of interest (QoIs) and organizing them into a structured DataFrame
111157 suitable for calibration tasks.
112-
158+
113159 Args:
114160 recreated_qoi_funcs (dict): Dictionary of QoI functions where keys are QoI names
115161 and values are callable functions.
@@ -132,7 +178,7 @@ def mcds_list_to_qoi_df_for_calib(recreated_qoi_funcs, all_sample_ids, chunk_siz
132178 mcds_ts_list = df_sample [df_sample ['ReplicateID' ] == ReplicateID ]['Data' ].values [0 ]
133179 for id_time , mcds in enumerate (mcds_ts_list ):
134180 data = {'SampleID' : SampleID , 'ReplicateID' : ReplicateID , 'time' : mcds .get_time ()}
135- try :
181+ try :
136182 for qoi_name , qoi_func in recreated_qoi_funcs .items ():
137183 function_result = safe_call_qoi_function (qoi_func , mcds = mcds , list_mcds = mcds_ts_list )
138184 if function_result is not None :
@@ -148,22 +194,24 @@ def mcds_list_to_qoi_df_for_calib(recreated_qoi_funcs, all_sample_ids, chunk_siz
148194
149195def calculate_qoi_from_sa_db (db_file : str , qoi_functions : dict , chunk_size : int = 10 , mode = 'sa' ) -> pd .DataFrame :
150196 """Calculate quantities of interest from sensitivity analysis database results.
151-
152- This function loads simulation results from a database in chunks and applies QoI
153- functions to extract meaningful metrics from the time-series data. Processing in
197+
198+ This function loads simulation results from a database in chunks and applies QoI
199+ functions to extract meaningful metrics from the time-series data. Processing in
154200 chunks helps avoid excessive memory usage for large databases.
155-
201+
156202 Args:
157203 db_file (str): Path to the SQLite database containing simulation results.
158204 qoi_functions (dict): Dictionary of QoI functions where keys are QoI names
159205 and values are lambda functions or string representations.
160206 chunk_size (int, optional): Number of samples to process at a time. Default is 10.
161207 Adjust based on available memory and data size.
162-
208+ mode: Specify the form of the result dataframe. Possible modes are
209+ sa, calib, and long. The default is sa.
210+
163211 Returns:
164212 pd.DataFrame: DataFrame with calculated QoI values indexed by SampleID
165213 and ReplicateID, with columns for each QoI.
166-
214+
167215 Example:
168216 >>> qoi_funcs = {
169217 ... 'final_cells': 'lambda data: data[-1]["cell_count"]',
@@ -175,7 +223,7 @@ def calculate_qoi_from_sa_db(db_file: str, qoi_functions: dict, chunk_size: int
175223 # Load sample IDs to determine what to process
176224 dic_samples = load_samples (db_file )
177225 all_sample_ids = sorted (dic_samples .keys ())
178-
226+
179227 # Recreate QoI functions from their string representations
180228 recreated_qoi_funcs = recreate_qoi_functions (qoi_functions )
181229 if mode == 'sa' :
@@ -192,17 +240,26 @@ def calculate_qoi_from_sa_db(db_file: str, qoi_functions: dict, chunk_size: int
192240 chunk_size = chunk_size ,
193241 db_file = db_file
194242 )
243+ elif mode == 'long' :
244+ df_qois = mcds_list_to_qoi_df_long (
245+ recreated_qoi_funcs = recreated_qoi_funcs ,
246+ all_sample_ids = all_sample_ids ,
247+ chunk_size = chunk_size ,
248+ db_file = db_file
249+ )
195250 else :
196251 raise ValueError (f"Unknown mode '{ mode } '. Supported modes are 'sa' and 'calib'." )
197252
198253 return df_qois
199254
255+
256+
200257def calculate_qoi_statistics (df_qois_data : pd .DataFrame , qoi_funcs : dict , db_file_path : str , ignore_db_consistency : bool = False ) -> pd .DataFrame :
201258 """Calculate statistical summaries of quantities of interest across replicates.
202-
259+
203260 This function computes mean and standard deviation of QoI values across
204261 simulation replicates for each parameter sample, enabling uncertainty quantification.
205-
262+
206263 Args:
207264 df_qois_data (pd.DataFrame): DataFrame containing QoI values with SampleID,
208265 ReplicateID, and QoI columns.
@@ -213,10 +270,10 @@ def calculate_qoi_statistics(df_qois_data: pd.DataFrame, qoi_funcs: dict, db_fil
213270 Returns:
214271 pd.DataFrame: DataFrame with statistical summaries (mean, std) of QoIs
215272 grouped by SampleID, with columns for each QoI statistic.
216-
273+
217274 Raises:
218275 ValueError: If no QoI functions are defined or data format is invalid.
219-
276+
220277 Example:
221278 >>> qoi_funcs = {'cell_count': lambda x: x.sum(), 'growth_rate': None}
222279 >>> stats_df = calculate_qoi_statistics(qoi_data, qoi_funcs, 'study.db')
@@ -298,4 +355,4 @@ def calculate_qoi_statistics(df_qois_data: pd.DataFrame, qoi_funcs: dict, db_fil
298355 df_relative_mcse [time_columns ] = df_mean [time_columns ]
299356 except Exception as e :
300357 raise ValueError (f"Error taking the mean and MCSE among replicates: { e } " )
301- return df_mean , df_relative_mcse
358+ return df_mean , df_relative_mcse
0 commit comments