Skip to content

Commit 93e7530

Browse files
committed
add experiments python script, edits for testing with docker
wrote a full Dockerfile, edits to build_drugs.sh to reflect position of utils scripts in docker container. Adding the 03_createSarcPDOExperimentFIle.py to the repo.
1 parent 0bb70fc commit 93e7530

File tree

4 files changed

+69
-5
lines changed

4 files changed

+69
-5
lines changed

build/docker/Dockerfile.sarcpdo

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,19 @@ FROM python:3.9
33
WORKDIR /usr/src/app
44

55

6-
COPY build/sarcpdo/00_createSarcPDOSampleFile.py .
6+
COPY build/sarcpdo/00_createSarcPDOSampleFile.py .
7+
COPY build/sarcpdo/01_createSarcPDOOmicsFiles.py .
8+
COPY build/sarcpdo/02_createSarcPDODrugsFile.py .
9+
COPY build/sarcpdo/03_createSarcPDOExperimentFile.py .
710
COPY build/sarcpdo/requirements.txt .
11+
COPY build/sarcpdo/*sh ./
12+
COPY build/utils/* ./
13+
14+
# Set MPLCONFIGDIR to a writable directory
15+
ENV MPLCONFIGDIR=/app/tmp/matplotlib
16+
RUN mkdir -p /app/tmp/matplotlib
817

918
RUN pip install --no-cache-dir -r requirements.txt
1019

11-
VOLUM ['/tmp']
20+
VOLUME ['/tmp']
1221

13-
CMD python
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import synapseclient
2+
import pandas as pd
3+
import numpy as np
4+
import argparse
5+
import os
6+
7+
8+
if __name__ == "__main__":
9+
10+
parser = argparse.ArgumentParser(description="This script handles downloading, processing and formatting of drug information for the Sarcoma PDO project into an experiments file")
11+
12+
parser.add_argument('-t', '--token', type=str, help='Synapse Token')
13+
parser.add_argument('-s', '--samplesFile', nargs = "?", type=str, default = "", help = "Use this to provide previously generated sample file for this dataset to link to experiment data.")
14+
parser.add_argument('-d', '--drugFile', nargs = "?", type=str, default = "", help = "Use this to provide previously generated drugs file for this dataset to link with to experiment data.")
15+
16+
args = parser.parse_args()
17+
print(args)
18+
print("Logging into Synapse")
19+
PAT = args.token
20+
synObject = synapseclient.login(authToken=PAT)
21+
22+
drug_query = synObject.tableQuery("select * from syn61892224")
23+
drug_data = drug_query.asDataFrame()
24+
25+
# convert Drug_Name to lowercase for merge with drug info files
26+
drug_data['chem_name'] = drug_data['Drug_Name'].str.lower()
27+
28+
sarcpdo_samples = pd.read_csv(args.samplesFile)
29+
30+
sarcpdo_drugs = pd.read_csv(args.drugFile, sep="\t")
31+
# reformat 'other_id', specifically, alter underscores before '1' to dashes so we can split on "_" and retain appended numbers
32+
# rename to "Sample_ID" for merging with drug_data
33+
sarcpdo_samples['Sample_ID'] = sarcpdo_samples['other_id'].str.lower()
34+
sarcpdo_samples["Sample_ID"] = sarcpdo_samples["Sample_ID"].str.replace("_1", "-1")
35+
sarcpdo_samples['Sample_ID'] = sarcpdo_samples["Sample_ID"].str.split("_", expand =True)[0]
36+
# and change dashes back to underscores to merge with drug_data's Sample_ID
37+
sarcpdo_samples["Sample_ID"] = sarcpdo_samples["Sample_ID"].str.replace("-", "_")
38+
39+
# inner merge with samples because there are samples without experiment info and many Sample_ID's in experiments data without sample info
40+
experiments = drug_data.merge(sarcpdo_drugs, how='left').merge(sarcpdo_samples, how='inner')
41+
42+
final_experiment = experiments[['improve_sample_id', 'improve_drug_id', 'Viability_Score']]
43+
final_experiment.loc[:,['study']] = 'Landscape of Sarcoma'
44+
final_experiment.loc[:,['source']] = 'pharmacoGX'
45+
final_experiment.loc[:,['time']] = None
46+
final_experiment.loc[:,['time_unit']]= None
47+
final_experiment.loc[:,['dose_response_metric']] = 'published_auc'
48+
final_experiment.loc[:,['dose_response_value']] = final_experiment['Viability_Score']
49+
50+
toReturn = final_experiment[['source', 'improve_sample_id', 'improve_drug_id', 'study', 'time', 'time_unit', 'dose_response_metric', 'dose_response_value']]
51+
52+
toReturn.to_csv('/tmp/sarcpdo_experiments.tsv', sep='\t', index=False)
53+
54+
55+
# to test run
56+
# python3 03_createSarcPDOExperimentFile.py -t $SYNAPSE_AUTH_TOKEN -s sarcpdo_samples.csv -d sarcpdo_drugs.tsv

build/sarcpdo/build_drugs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -euo pipefail
44
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
55

66
echo "Running script with token and drugFile $1"
7-
python3 02_createSarcPDODrugsFile.py --token $SYNAPSE_AUTH_TOKEN -d $1 -o /tmp/sarcpdo_drugs.tsv
7+
python3 02_createSarcPDODrugsFile --token $SYNAPSE_AUTH_TOKEN -d $1 -o /tmp/sarcpdo_drugs.tsv
88

99
echo "Running build_drug_desc.py..."
1010
python3 build_drug_desc.py --drugtable /tmp/sarcpdo_drugs.tsv --desctable /tmp/sarcpdo_drug_descriptors.tsv.gz

build/sarcpdo/build_exp.sh

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ set -euo pipefail
44
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
55

66
echo "Running script with token and curSamples $1 and drugFile $2."
7-
python3 03_createSarcPDOExperimentFile.py -t $SYNAPSE_AUTH_TOKEN -s sarcpdo_samples.csv -d sarcpdo_drugs.tsv
7+
python3 03_createSarcPDOExperimentFile.py -t $SYNAPSE_AUTH_TOKEN -s $1 -d $2

0 commit comments

Comments
 (0)