|
2 | 2 |
|
3 | 3 | import logging |
4 | 4 | import os |
| 5 | +import re |
5 | 6 | from typing import Iterable, Optional, Callable, Union, Any, Sequence, List |
6 | 7 | from warnings import warn |
7 | 8 |
|
@@ -124,125 +125,52 @@ def flatten_timepoint_specific_output_overrides( |
124 | 125 | petab_problem: |
125 | 126 | PEtab problem to work on |
126 | 127 | """ |
127 | | - measurement_df = petab_problem.measurement_df |
128 | | - |
129 | | - # remember if columns exist |
130 | | - has_obs_par = OBSERVABLE_PARAMETERS in measurement_df |
131 | | - has_noise_par = NOISE_PARAMETERS in measurement_df |
132 | | - has_preeq = PREEQUILIBRATION_CONDITION_ID in measurement_df |
133 | | - |
134 | | - # fill in optional columns to avoid special cases later |
135 | | - if not has_obs_par \ |
136 | | - or np.all(measurement_df[OBSERVABLE_PARAMETERS].isnull()): |
137 | | - measurement_df[OBSERVABLE_PARAMETERS] = '' |
138 | | - if not has_noise_par \ |
139 | | - or np.all(measurement_df[NOISE_PARAMETERS].isnull()): |
140 | | - measurement_df[NOISE_PARAMETERS] = '' |
141 | | - if not has_preeq \ |
142 | | - or np.all(measurement_df[PREEQUILIBRATION_CONDITION_ID].isnull()): |
143 | | - measurement_df[PREEQUILIBRATION_CONDITION_ID] = '' |
144 | | - # convert to str row by row |
145 | | - for irow, row in measurement_df.iterrows(): |
146 | | - if is_empty(row[OBSERVABLE_PARAMETERS]): |
147 | | - measurement_df.at[irow, OBSERVABLE_PARAMETERS] = '' |
148 | | - if is_empty(row[NOISE_PARAMETERS]): |
149 | | - measurement_df.at[irow, NOISE_PARAMETERS] = '' |
150 | | - if is_empty(row[PREEQUILIBRATION_CONDITION_ID]): |
151 | | - measurement_df.at[irow, PREEQUILIBRATION_CONDITION_ID] = '' |
152 | | - |
153 | | - # Create empty df -> to be filled with replicate-specific observables |
154 | | - df_new = pd.DataFrame() |
155 | | - |
156 | | - # Get observableId, preequilibrationConditionId |
157 | | - # and simulationConditionId columns in measurement df |
158 | | - cols = get_notnull_columns( |
159 | | - measurement_df, |
160 | | - [OBSERVABLE_ID, PREEQUILIBRATION_CONDITION_ID, |
161 | | - SIMULATION_CONDITION_ID] |
162 | | - ) |
163 | | - df = measurement_df[cols] |
164 | | - |
165 | | - # Get unique combinations of observableId, preequilibrationConditionId |
166 | | - # and simulationConditionId |
167 | | - df_unique_values = df.drop_duplicates() |
168 | | - |
169 | | - # replaced observables: new ID => old ID |
170 | | - replacements = dict() |
171 | | - |
172 | | - # Loop over each unique combination |
173 | | - for irow in df_unique_values.index: |
174 | | - df = measurement_df.loc[ |
175 | | - (measurement_df[OBSERVABLE_ID] == |
176 | | - df_unique_values.loc[irow, OBSERVABLE_ID]) |
177 | | - & (measurement_df[PREEQUILIBRATION_CONDITION_ID] == |
178 | | - df_unique_values.loc[irow, PREEQUILIBRATION_CONDITION_ID]) |
179 | | - & (measurement_df[SIMULATION_CONDITION_ID] == |
180 | | - df_unique_values.loc[irow, SIMULATION_CONDITION_ID]) |
181 | | - ] |
182 | | - |
183 | | - # Get list of unique observable parameters |
184 | | - unique_sc = df[OBSERVABLE_PARAMETERS].unique() |
185 | | - # Get list of unique noise parameters |
186 | | - unique_noise = df[NOISE_PARAMETERS].unique() |
187 | | - |
188 | | - # Loop |
189 | | - for i_noise, cur_noise in enumerate(unique_noise): |
190 | | - for i_sc, cur_sc in enumerate(unique_sc): |
191 | | - # Find the position of all instances of cur_noise |
192 | | - # and unique_sc[j] in their corresponding column |
193 | | - # (full-string matches are denoted by zero) |
194 | | - idxs = ( |
195 | | - df[NOISE_PARAMETERS].astype(str).str.find(cur_noise) + |
196 | | - df[OBSERVABLE_PARAMETERS].astype(str).str.find(cur_sc) |
| 128 | + new_measurement_dfs = [] |
| 129 | + new_observable_dfs = [] |
| 130 | + possible_groupvars = [OBSERVABLE_ID, OBSERVABLE_PARAMETERS, |
| 131 | + NOISE_PARAMETERS, SIMULATION_CONDITION_ID, |
| 132 | + PREEQUILIBRATION_CONDITION_ID] |
| 133 | + groupvars = get_notnull_columns(petab_problem.measurement_df, |
| 134 | + possible_groupvars) |
| 135 | + for groupvar, measurements in \ |
| 136 | + petab_problem.measurement_df.groupby(groupvars, dropna=False): |
| 137 | + obs_id = groupvar[groupvars.index(OBSERVABLE_ID)] |
| 138 | + # construct replacement id |
| 139 | + replacement_id = '' |
| 140 | + for field in possible_groupvars: |
| 141 | + if field in groupvars: |
| 142 | + val = groupvar[groupvars.index(field) |
| 143 | + ].replace(';', '_').replace('.', '_') |
| 144 | + if replacement_id == '': |
| 145 | + replacement_id = val |
| 146 | + elif val != '': |
| 147 | + replacement_id += f'__{val}' |
| 148 | + |
| 149 | + logger.debug(f'Creating synthetic observable {obs_id}') |
| 150 | + if replacement_id in petab_problem.observable_df.index: |
| 151 | + raise RuntimeError('could not create synthetic observables ' |
| 152 | + f'since {replacement_id} was already ' |
| 153 | + 'present in observable table') |
| 154 | + observable = petab_problem.observable_df.loc[obs_id].copy() |
| 155 | + observable.name = replacement_id |
| 156 | + for field, parname, target in [ |
| 157 | + (NOISE_PARAMETERS, 'noiseParameter', NOISE_FORMULA), |
| 158 | + (OBSERVABLE_PARAMETERS, 'observableParameter', OBSERVABLE_FORMULA) |
| 159 | + ]: |
| 160 | + if field in measurements: |
| 161 | + observable[target] = re.sub( |
| 162 | + fr'{parname}([0-9]+)_{obs_id}', |
| 163 | + f'{parname}\\1_{replacement_id}', |
| 164 | + observable[target] |
197 | 165 | ) |
198 | | - tmp_ = df.loc[idxs == 0, OBSERVABLE_ID] |
199 | | - # Create replicate-specific observable name |
200 | | - tmp = tmp_ + "_" + str(i_noise + i_sc + 1) |
201 | | - # Check if replicate-specific observable name already exists |
202 | | - # in df. If true, rename replicate-specific observable |
203 | | - counter = 2 |
204 | | - while (df[OBSERVABLE_ID].str.find( |
205 | | - tmp.to_string() |
206 | | - ) == 0).any(): |
207 | | - tmp = tmp_ + counter * "_" + str(i_noise + i_sc + 1) |
208 | | - counter += 1 |
209 | | - if not tmp_.empty: |
210 | | - replacements[tmp.values[0]] = tmp_.values[0] |
211 | | - df.loc[idxs == 0, OBSERVABLE_ID] = tmp |
212 | | - # Append the result in a new df |
213 | | - df_new = df_new.append(df.loc[idxs == 0]) |
214 | | - # Restore the observable name in the original df |
215 | | - # (for continuation of the loop) |
216 | | - df.loc[idxs == 0, OBSERVABLE_ID] = tmp |
217 | | - |
218 | | - # remove previously non-existent columns again |
219 | | - if not has_obs_par: |
220 | | - df_new.drop(columns=OBSERVABLE_PARAMETERS, inplace=True) |
221 | | - if not has_noise_par: |
222 | | - df_new.drop(columns=NOISE_PARAMETERS, inplace=True) |
223 | | - if not has_preeq: |
224 | | - df_new.drop(columns=PREEQUILIBRATION_CONDITION_ID, inplace=True) |
225 | | - |
226 | | - # Update/Redefine measurement df with replicate-specific observables |
227 | | - petab_problem.measurement_df = df_new |
228 | | - |
229 | | - observable_df = petab_problem.observable_df |
230 | | - |
231 | | - # Update observables table |
232 | | - for replacement, replacee in replacements.items(): |
233 | | - new_obs = observable_df.loc[replacee].copy() |
234 | | - new_obs.name = replacement |
235 | | - new_obs[OBSERVABLE_FORMULA] = new_obs[OBSERVABLE_FORMULA].replace( |
236 | | - replacee, replacement) |
237 | | - new_obs[NOISE_FORMULA] = new_obs[NOISE_FORMULA].replace( |
238 | | - replacee, replacement) |
239 | | - observable_df = observable_df.append( |
240 | | - new_obs |
241 | | - ) |
242 | 166 |
|
243 | | - petab_problem.observable_df = observable_df |
244 | | - petab_problem.observable_df.drop(index=set(replacements.values()), |
245 | | - inplace=True) |
| 167 | + measurements[OBSERVABLE_ID] = replacement_id |
| 168 | + new_measurement_dfs.append(measurements) |
| 169 | + new_observable_dfs.append(observable) |
| 170 | + |
| 171 | + petab_problem.observable_df = pd.concat(new_observable_dfs, axis=1).T |
| 172 | + petab_problem.observable_df.index.name = OBSERVABLE_ID |
| 173 | + petab_problem.measurement_df = pd.concat(new_measurement_dfs) |
246 | 174 |
|
247 | 175 |
|
248 | 176 | def concat_tables( |
|
0 commit comments