Skip to content

Commit 2e37780

Browse files
authored
Merge pull request #27 from elmbeech:batch_args
mcds_list_to_qoi_df_long implementation for calculate_qoi_from_sa_db
2 parents 8c6c8ea + cff34d3 commit 2e37780

File tree

1 file changed

+80
-23
lines changed

1 file changed

+80
-23
lines changed

uq_physicell/model_analysis/utils.py

Lines changed: 80 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,20 @@
88

99
def reshape_sa_expanded_data(expanded_data: pd.DataFrame, qoi_columns: list) -> pd.DataFrame:
1010
"""Reshape expanded sensitivity analysis data for pivot table analysis.
11-
11+
1212
This function transforms time-series data from long format (multiple rows per sample)
1313
to wide format (columns for each time point) to facilitate statistical analysis.
14-
14+
1515
Args:
1616
expanded_data (pd.DataFrame): DataFrame containing expanded simulation data
1717
with SampleID, ReplicateID, time, and QoI columns.
1818
qoi_columns (list): List of quantity of interest column names to reshape.
19-
19+
2020
Returns:
2121
pd.DataFrame: Reshaped DataFrame with multi-level columns where each QoI
2222
and time point becomes a separate column indexed by SampleID
2323
and ReplicateID.
24-
24+
2525
Example:
2626
>>> data = pd.DataFrame({
2727
... 'SampleID': [0, 0, 1, 1],
@@ -59,11 +59,11 @@ def reshape_sa_expanded_data(expanded_data: pd.DataFrame, qoi_columns: list) ->
5959

6060
def mcds_list_to_qoi_df_for_sa(recreated_qoi_funcs, all_sample_ids, chunk_size, db_file) -> pd.DataFrame:
6161
"""Convert a list of MCDS objects to a DataFrame of quantities of interest for sensitivity analysis.
62-
62+
6363
This function processes a list of MCDS simulation results, extracting relevant
6464
quantities of interest (QoIs) at each time point and organizing them into a
6565
structured DataFrame suitable for sensitivity analysis.
66-
66+
6767
Args:
6868
recreated_qoi_funcs (dict): Dictionary of QoI functions where keys are QoI names
6969
and values are callable functions.
@@ -88,7 +88,7 @@ def mcds_list_to_qoi_df_for_sa(recreated_qoi_funcs, all_sample_ids, chunk_size,
8888
data = {'SampleID': SampleID, 'ReplicateID': ReplicateID}
8989
for id_time, mcds in enumerate(mcds_ts_list):
9090
data[f"time_{id_time}"] = mcds.get_time()
91-
try:
91+
try:
9292
for qoi_name, qoi_func in recreated_qoi_funcs.items():
9393
function_result = safe_call_qoi_function(qoi_func, mcds=mcds, list_mcds=mcds_ts_list)
9494
if function_result is not None:
@@ -99,17 +99,63 @@ def mcds_list_to_qoi_df_for_sa(recreated_qoi_funcs, all_sample_ids, chunk_size,
9999
# Store the data in a DataFrame
100100
df_qoi_replicate = pd.DataFrame({key: [value] for key, value in data.items()})
101101
df_qois = pd.concat([df_qois, df_qoi_replicate], ignore_index=True)
102-
102+
103103
df_qois = df_qois.reset_index(drop=True)
104104
return df_qois
105105

106+
def mcds_list_to_qoi_df_long(recreated_qoi_funcs, all_sample_ids, chunk_size, db_file) -> pd.DataFrame:
107+
"""Convert a list of MCDS objects to a DataFrame of quantities of interest in long format.
108+
109+
This function processes a list of MCDS simulation results, extracting relevant
110+
quantities of interest (QoIs) at each time point and organizing them into a long
111+
structured DataFrame.
112+
113+
Args:
114+
recreated_qoi_funcs (dict): Dictionary of QoI functions where keys are QoI names
115+
and values are callable functions.
116+
all_sample_ids (list): List of all sample IDs to process.
117+
chunk_size (int): Number of samples to process in each chunk to manage memory usage.
118+
db_file (str): Path to the database file containing simulation output.
119+
Returns:
120+
pd.DataFrame: DataFrame with calculated QoI values indexed by SampleID
121+
and ReplicateID, with columns for each QoI - columns combined with time points.
122+
"""
123+
# Process samples in chunks to avoid memory issues
124+
ls_column = ['SampleID','ReplicateID', 'time'] + sorted(recreated_qoi_funcs.keys())
125+
llo_data = []
126+
for i in range(0, len(all_sample_ids), chunk_size):
127+
chunk_sample_ids = all_sample_ids[i:i + chunk_size]
128+
# Load only this chunk of data
129+
df_output = load_output(db_file, sample_ids=chunk_sample_ids, load_data=True)
130+
for SampleID in sorted(df_output['SampleID'].unique()):
131+
df_sample = df_output[df_output['SampleID'] == SampleID]
132+
df_qoi_replicate = pd.DataFrame()
133+
for ReplicateID in sorted(df_sample['ReplicateID'].unique()):
134+
mcds_ts_list = df_sample[df_sample['ReplicateID'] == ReplicateID]['Data'].values[0]
135+
# print(f"SampleID: {SampleID}, ReplicateID: {ReplicateID} - mcds_ts_list: {mcds_ts_list}")
136+
for mcds in mcds_ts_list:
137+
lo_data = [SampleID, ReplicateID, mcds.get_time()]
138+
try:
139+
for qoi_name, qoi_func in sorted(recreated_qoi_funcs.items()):
140+
# Store functions the qoi result
141+
function_result = safe_call_qoi_function(qoi_func, mcds=mcds, list_mcds=mcds_ts_list)
142+
lo_data.append(function_result)
143+
except Exception as e:
144+
raise RuntimeError(f"Error calculating QoIs for SampleID: {SampleID}, ReplicateID: {ReplicateID} - QoI: {qoi_name}_{id_time}: {e}")
145+
# Store the mcds results
146+
llo_data.append(lo_data)
147+
148+
# Gernate data frame
149+
df_qois = pd.DataFrame(llo_data, columns=ls_column)
150+
return df_qois
151+
106152
def mcds_list_to_qoi_df_for_calib(recreated_qoi_funcs, all_sample_ids, chunk_size, db_file) -> pd.DataFrame:
107153
"""Convert a list of MCDS objects to a DataFrame of quantities of interest for calibration.
108-
154+
109155
This function processes a list of MCDS simulation results, extracting relevant
110156
quantities of interest (QoIs) and organizing them into a structured DataFrame
111157
suitable for calibration tasks.
112-
158+
113159
Args:
114160
recreated_qoi_funcs (dict): Dictionary of QoI functions where keys are QoI names
115161
and values are callable functions.
@@ -132,7 +178,7 @@ def mcds_list_to_qoi_df_for_calib(recreated_qoi_funcs, all_sample_ids, chunk_siz
132178
mcds_ts_list = df_sample[df_sample['ReplicateID'] == ReplicateID]['Data'].values[0]
133179
for id_time, mcds in enumerate(mcds_ts_list):
134180
data = {'SampleID': SampleID, 'ReplicateID': ReplicateID, 'time': mcds.get_time()}
135-
try:
181+
try:
136182
for qoi_name, qoi_func in recreated_qoi_funcs.items():
137183
function_result = safe_call_qoi_function(qoi_func, mcds=mcds, list_mcds=mcds_ts_list)
138184
if function_result is not None:
@@ -148,22 +194,24 @@ def mcds_list_to_qoi_df_for_calib(recreated_qoi_funcs, all_sample_ids, chunk_siz
148194

149195
def calculate_qoi_from_sa_db(db_file: str, qoi_functions: dict, chunk_size: int = 10, mode='sa') -> pd.DataFrame:
150196
"""Calculate quantities of interest from sensitivity analysis database results.
151-
152-
This function loads simulation results from a database in chunks and applies QoI
153-
functions to extract meaningful metrics from the time-series data. Processing in
197+
198+
This function loads simulation results from a database in chunks and applies QoI
199+
functions to extract meaningful metrics from the time-series data. Processing in
154200
chunks helps avoid excessive memory usage for large databases.
155-
201+
156202
Args:
157203
db_file (str): Path to the SQLite database containing simulation results.
158204
qoi_functions (dict): Dictionary of QoI functions where keys are QoI names
159205
and values are lambda functions or string representations.
160206
chunk_size (int, optional): Number of samples to process at a time. Default is 10.
161207
Adjust based on available memory and data size.
162-
208+
mode: Specify the form of the result dataframe. Possible modes are
209+
sa, calib, and long. The default is sa.
210+
163211
Returns:
164212
pd.DataFrame: DataFrame with calculated QoI values indexed by SampleID
165213
and ReplicateID, with columns for each QoI.
166-
214+
167215
Example:
168216
>>> qoi_funcs = {
169217
... 'final_cells': 'lambda data: data[-1]["cell_count"]',
@@ -175,7 +223,7 @@ def calculate_qoi_from_sa_db(db_file: str, qoi_functions: dict, chunk_size: int
175223
# Load sample IDs to determine what to process
176224
dic_samples = load_samples(db_file)
177225
all_sample_ids = sorted(dic_samples.keys())
178-
226+
179227
# Recreate QoI functions from their string representations
180228
recreated_qoi_funcs = recreate_qoi_functions(qoi_functions)
181229
if mode == 'sa':
@@ -192,17 +240,26 @@ def calculate_qoi_from_sa_db(db_file: str, qoi_functions: dict, chunk_size: int
192240
chunk_size=chunk_size,
193241
db_file=db_file
194242
)
243+
elif mode == 'long':
244+
df_qois = mcds_list_to_qoi_df_long(
245+
recreated_qoi_funcs=recreated_qoi_funcs,
246+
all_sample_ids=all_sample_ids,
247+
chunk_size=chunk_size,
248+
db_file=db_file
249+
)
195250
else:
196251
raise ValueError(f"Unknown mode '{mode}'. Supported modes are 'sa' and 'calib'.")
197252

198253
return df_qois
199254

255+
256+
200257
def calculate_qoi_statistics(df_qois_data: pd.DataFrame, qoi_funcs: dict, db_file_path: str, ignore_db_consistency: bool = False) -> pd.DataFrame:
201258
"""Calculate statistical summaries of quantities of interest across replicates.
202-
259+
203260
This function computes mean and standard deviation of QoI values across
204261
simulation replicates for each parameter sample, enabling uncertainty quantification.
205-
262+
206263
Args:
207264
df_qois_data (pd.DataFrame): DataFrame containing QoI values with SampleID,
208265
ReplicateID, and QoI columns.
@@ -213,10 +270,10 @@ def calculate_qoi_statistics(df_qois_data: pd.DataFrame, qoi_funcs: dict, db_fil
213270
Returns:
214271
pd.DataFrame: DataFrame with statistical summaries (mean, std) of QoIs
215272
grouped by SampleID, with columns for each QoI statistic.
216-
273+
217274
Raises:
218275
ValueError: If no QoI functions are defined or data format is invalid.
219-
276+
220277
Example:
221278
>>> qoi_funcs = {'cell_count': lambda x: x.sum(), 'growth_rate': None}
222279
>>> stats_df = calculate_qoi_statistics(qoi_data, qoi_funcs, 'study.db')
@@ -298,4 +355,4 @@ def calculate_qoi_statistics(df_qois_data: pd.DataFrame, qoi_funcs: dict, db_fil
298355
df_relative_mcse[time_columns] = df_mean[time_columns]
299356
except Exception as e:
300357
raise ValueError(f"Error taking the mean and MCSE among replicates: {e}")
301-
return df_mean, df_relative_mcse
358+
return df_mean, df_relative_mcse

0 commit comments

Comments
 (0)