Skip to content

Commit aae9160

Browse files
committed
added logic to convert from mRE(S)CIST to auc
1 parent c0e3c03 commit aae9160

File tree

1 file changed

+33
-6
lines changed

1 file changed

+33
-6
lines changed

scripts/prepare_data_for_improve.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,16 @@ def process_datasets(args):
172172
experiments = []
173173
logger.debug("creating list of datasets that contain experiment info ...")
174174
for data_set in data_sets_names_list:
175-
# sarcpdo has different drug response values
176-
if data_set == 'sarcpdo' and data_sets[data_set].experiments is not None:
175+
experiments_raw = data_sets[data_set].experiments
176+
177+
# Some datasets don't have drug response data (the experiments
178+
# table)
179+
if experiments_raw is None:
180+
logger.debug(f"NO experiment data for {data_set}")
181+
182+
183+
# Logic for datasets containing "published_auc" but not "auc"
184+
elif experiments_raw['dose_response_metric'].isin(['published_auc']).any():
177185
experiment = data_sets[data_set].format(
178186
data_type='experiments',
179187
shape='wide',
@@ -183,8 +191,29 @@ def process_datasets(args):
183191
)
184192
experiment.rename(columns={'published_auc': 'auc'}, inplace=True)
185193
experiments.append(experiment)
186-
# not all Datasets have experiments / drug response data
187-
elif data_sets[data_set].experiments is not None:
194+
195+
# Logic for PDX datasets that don't have `auc` but mRECIST (note
196+
# the typo currently in the `drugresponse_metric` column).
197+
elif experiments_raw['dose_response_metric'].isin(['mRESCIST']).any():
198+
experiment = data_sets[data_set].format(
199+
data_type='experiments',
200+
shape='wide',
201+
metrics=[
202+
'mRESCIST',
203+
],
204+
)
205+
# conversion logic from mRECIST -> auc
206+
experiment.loc[experiment['mRESCIST'] == 'CR', 'mRESCIST'] = 0.1
207+
experiment.loc[experiment['mRESCIST'] == 'PR', 'mRESCIST'] = 0.2
208+
experiment.loc[experiment['mRESCIST'] == 'SD', 'mRESCIST'] = 0.5
209+
experiment.loc[experiment['mRESCIST'] == 'PD', 'mRESCIST'] = 1.0
210+
211+
experiment.rename(columns={'mRESCIST': 'auc'}, inplace=True)
212+
experiments.append(experiment)
213+
214+
# The remaining datasets should have `auc` as
215+
# drug_response_metric available in the `experiments` table
216+
else:
188217
logger.debug(f"experiment data found for {data_set}")
189218
# formatting existing response data to wide
190219
experiment = data_sets[data_set].format(
@@ -203,8 +232,6 @@ def process_datasets(args):
203232
],
204233
)
205234
experiments.append(experiment.dropna())
206-
else:
207-
logger.debug(f"NO experiment data for {data_set}")
208235

209236
# concatenating existing response data and "clean up"
210237
logger.debug("concatenating experiment data ...")

0 commit comments

Comments
 (0)