Skip to content

Commit ec2ac6b

Browse files
authored
Merge pull request #36 from samplchallenges/add_logP_expt
Add logP experimental values
2 parents 2c4bfd3 + 918de06 commit ec2ac6b

File tree

9 files changed

+114
-1
lines changed

9 files changed

+114
-1
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ Of course, we also appreciate it if you cite any overview/experimental papers re
6666
- 2023-01-13: Switch logP compound numbering to have "SAMPL9-" prefix
6767
- 2023-01-20: Provide logP submission template and example
6868
- 2023-01-23: Add [submission server](https://submit.samplchallenges.org/submit/sampl9-logp) link for logP, update deadline to Jan. 31.
69-
- 2023-01-31: Add SAMPL submissions
69+
- 2023-01-31: Add SAMPL submissions for logP challenge
70+
- 2023-02-15: Add logP challenge experimental data
71+
- 2023-02-15: Add logP challenge super preliminary analysis
7072

7173
## Challenge construction
7274

@@ -102,6 +104,7 @@ The bCD challenge focuses on binding of five phenothiazine antipsychotic drugs t
102104
- [`experimental_measurements.X`]: Summary table of experimental data in `.csv` and `.json` formats. Includes WP6 and bCD.
103105
- [`experimental_data/CD/`](experimental_data/CD/PhenothiazineCD-Binding-Summary-3-31-2022_BAedit.docx): Data provided by the Gilson group in `.docx` format. This document was updated/corrected on 3/31/2022 from earlier values.
104106
- ['protein_ligand`](protein_ligand): Information on the protein-ligand challenge on nanoluciferase (nanoluc) binding, which includes library screening and prediction of potency (IC50).
107+
- [`logP`](logP): Files pertaining to the logP challenge, including challenge inputs, submissions, and experimental data.
105108

106109
## SAMPL-related
107110
If you give a SAMPL-related talk or presentation or an analysis of its data, and are willing to share publicly, please consider posting on Zenodo and linking it to the [SAMPL Zenodo community](https://zenodo.org/communities/sampl?page=1&size=20).

logP/Analysis/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
### Manifest
2+
3+
- `sampl_user_map.csv`: Users/submissions
4+
- `Submissions`: Submission files
5+
- `prelim_analysis`: Very preliminary analysis of results from William Zamora (we will update with our own analysis as time permits)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
In this directory is a preliminary analysis (done in R) provided by William Zamora. He manually pasted submissions into sampl9_submissions.csv and used the attached R script to analyze.
2+
3+
We plan to conduct our own analysis.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
2+
########
3+
#SAMPL9#
4+
########
5+
6+
#06.02.23
7+
#William Zamora
8+
9+
library("Metrics")
10+
11+
#Submissions
12+
#logP_MD.csv, logP_Mixed.csv,and logP_QM.csv
13+
#reported transfer energies with positive values when they should be negatives and vice versa
14+
#here these values were multiplied by -1
15+
16+
data=read.csv("sampl9_submissions.csv")
17+
18+
#Root mean square deviation (RSMD)
19+
rsmd=NULL
20+
for (i in c(5:length(data))) {
21+
y=data[,4]
22+
r0=round(rmse(y,data[,i]),2)
23+
rsmd=append(rsmd,r0)}
24+
25+
statdata=data.frame(colnames(data)[5:length(data)],rsmd)
26+
colnames(statdata)[1]="Method"
27+
28+
#coefficient of determination (R2),
29+
r2=NULL
30+
for (i in c(5:length(data))) {
31+
y=data[,4]
32+
r0=round((cor(y,data[,i]))^2,2)
33+
r2=append(r2,r0)}
34+
35+
statdata$r2=r2
36+
37+
#mean signed error (MSE)
38+
mse=NULL
39+
for (i in c(5:length(data))) {
40+
y=data[,4]
41+
r0=round(sum((y-data[,i])/length(data[,i])),2)
42+
mse=append(mse,r0)}
43+
44+
statdata$mse=mse
45+
46+
#mean unsigned error (MUE)
47+
mue=NULL
48+
for (i in c(5:length(data))) {
49+
y=data[,4]
50+
r0<-round(mae(data[,i],y),2)
51+
mue=append(mue,r0)}
52+
53+
statdata$mue=mue
54+
55+
#data ordered by RSMD
56+
statdata=statdata[order(statdata$rsmd),]
57+
58+
write.table(statdata,"sampl9_stat.csv",sep=",",row.names=FALSE)
59+
60+
61+
11.7 KB
Binary file not shown.

logP/Analysis/prelim_analysis/sampl9_submissions.csv

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
id,name,logPexp,Dgexp,COSMO-RS,EC-RISM_TFE_P3,EC-RISM_TFE_P1,MD-FEP-AWH_3x3x3_4ns_TIP4P,RI-B3LYP/def2-TZVP with MD clusters,NE-FG,EE-MCC/GAFF2-AM1-BCC/TIP3P/MD/,SM8,MM/PBSA,COSMO-RS-2,EE/Openff-2.0/TIP3P/MD-EE/WL,DLPNO-CCSD(T)/def2-SVP,MD (GAFF/TIP3P),MD (OPLS-AA/M24),MD (OPLS-AA/TIP4P),MD_GAFF_IPolQ_LJFit_pathfinder,gc-lser-ufz,gc-lser
2+
1,Albendazole,3.76,-5.11,-2.52,-3.35,-0.43,-3.38,-3.36,-4,-3.6,1.26,-3.45,-2.38,-4.542,-4.23,-4.2,-8.17,-8.71,7.785,-2.22,-2.88
3+
2,Alprenolol,2.4,-3.26,-5.89,-4.01,-4.03,-3.94,-1.75,-5.1,-5.7,-2.35,-3.39,-3.92,-5.813,-4.2,-6.45,-6.84,-6.62,-6.412,-4.05,-2.1
4+
3,Amitriptyline,5.51,-7.49,-9.81,-7.25,-6.65,-6.04,-7.03,-7.7,-3.2,-6.5,-4.84,-8.17,-8.44,-6.64,-7.41,-8.81,-8.87,-13.754,-7.84,-7.42
5+
4,Bifonazole,5.47,-7.44,-8.91,-4.91,-3.46,-1.71,-5.97,-8.2,-5.5,-8.7,-5.14,-7.09,-7.916,-8.36,-7.68,-10.4,-10.5,-8.258,-7.19,-8.2
6+
5,Chlorpheniramine maleate salt,3.61,-4.91,-7.24,-6.23,-5.64,-2.96,-3.99,-5.5,-3.7,-4.13,-4.25,-5.74,-7.455,-6.52,-6.98,-8.26,-8.23,-8.382,-5.29,-5.77
7+
6,Epinephrine,-1.23,1.67,4.66,4.94,6.88,2.94,4.01,2.8,-0.6,4.39,2.8,5.67,1.954,3.86,1.27,1.74,1.72,0.024,6.17,5.54
8+
7,Fluphenazine dihydrochloride,4.37,-5.94,-6.48,-8.6,-6.76,-0.64,-2.39,-7.7,-7.4,-2.67,-5.13,-4.41,-9.73,-4.23,-9.65,-6.45,-7.09,-13.918,-5.68,-5.81
9+
8,Glyburide,2.79,-3.79,-2.26,-5.2,-2.77,-8.85,2.22,1.3,-4.6,9.47,-6.26,-0.7,-4.467,-1.06,0.22,-3.5,-5.99,-4.964,-4.75,-3.18
10+
9,Imipramine hydrochloride,5.05,-6.87,-9.76,-7.6,-6.3,-5.57,-6.16,-7.8,0.1,-7.01,-4.56,-7.96,-8.404,-6.84,-7.39,-8.19,-8.17,-9.336,-7.94,-7.29
11+
10,Ketoprofen,2.47,-3.36,-3.08,-1.74,-0.35,-1.17,-3.04,-5,-2.9,-2.46,-1.98,-1.46,-3.786,-2.2,0.34,-7.28,-7.22,-6.621,-3.62,-2.31
12+
11,Nalidixic acid,1.46,-1.99,-2.54,5.13,8.28,-5.3,-6.63,-1.9,0.2,13.48,-1.58,-1.84,-0.06,-1.21,-0.9,-4.47,-4.54,-3.083,0.35,1.86
13+
12,Paracetamol,-1.59,2.16,1.66,4.96,6.59,0.97,2.1,1.3,-1.2,2.62,0.56,3.52,2.498,1.27,2.35,0.4,0.36,15.312,2.14,1.54
14+
13,Pindolol,0.36,-0.49,-2.67,-0.59,1.18,-1.56,-1.12,-2.9,-3,-0.36,-0.18,-0.57,-1.765,-1.4,-3.35,-4.83,-4.56,-3.355,-1.43,0.29
15+
14,Quinine,1.41,-1.92,-4.12,-2.35,0.51,-6.33,-0.52,-5.3,-6.3,-0.73,-2.47,-1.15,-7.449,-1.88,-7.02,-7.01,-6.92,-5.013,-1.91,-4.53
16+
15,Sulfamethazine,-0.74,1.01,3.71,4.69,7.77,-9.07,-9.62,1.5,0.3,10.25,-0.59,0.77,3.152,-2.51,2.35,2.64,2.59,-2.188,1.04,0.94
17+
16,Trazodone hydrochloride,3.77,-5.13,-5.75,-5.56,-2.39,-5.31,-0.99,-2.6,-2.8,1.6,-4.59,-4.39,-8.128,-7.58,-8.69,-7.57,-8.45,-5.426,-3.98,-4.42

logP/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,5 @@ Submissions must be in the required format, linked below. The submission file mu
4949
- `compounds_image.png`: An image of the compounds
5050
- `logP_prediction_template.csv`: Template file for predictions
5151
- `logP_template_example.csv`: Example completed template for submission
52+
- `experimental_data`: Experimental data for logP challenge
53+
- `Analysis`: Includes submissions and eventually analysis; for now includes a very preliminary first pass at statistics.

logP/experimental_data/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## LogP experimental values
2+
3+
### Manifest
4+
5+
- `measurements_final.csv`: Contains final measured values for the logP experiments, from William Zamora, Clara Ràfols, and Rebecca Ruiz. The `new_logPexp_reviewed` column contains final values after repeated measurements and re-verification; the other column contains initial draft values. The former (final) column is the one which will be reported in the experimental paper and used in analysis. (Provided 2023-02-06 by William Zamora)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
id,name,smiles,logPexp_initial,new_logPexp_reviewed
2+
SAMPL9-1,Albendazole,CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC ,3.76,3.76
3+
SAMPL9-2,Alprenolol,CC(C)NCC(O)COc1ccccc1CC=C ,2.40,2.40
4+
SAMPL9-3,Amitriptyline,CN(C)CCC=C2c1ccccc1CCc3ccccc23 ,5.51,5.51
5+
SAMPL9-4,Bifonazole,c1ccc(cc1)C(c2ccc(cc2)c3ccccc3)n4ccnc4 ,5.47,5.47
6+
SAMPL9-5,Chlorpheniramine maleate salt,CN(C)CCC(c1ccc(Cl)cc1)c2ccccn2,3.61,3.61
7+
SAMPL9-6,Epinephrine,CNC[C@H](O)c1ccc(O)c(O)c1,-1.23,-1.23
8+
SAMPL9-7,Fluphenazine dihydrochloride,OCCN4CCN(CCCN2c1ccccc1Sc3ccc(cc23)C(F)(F)F)CC4 ,4.37,4.37
9+
SAMPL9-8,Glyburide,COc1ccc(Cl)cc1C(=O)NCCc2ccc(cc2)S(=O)(=O)NC(=O)NC3CCCCC3,2.79,2.79
10+
SAMPL9-9,Imipramine hydrochloride,CN(C)CCCN2c1ccccc1CCc3ccccc23,5.05,5.05
11+
SAMPL9-10,Ketoprofen,CC(C(O)=O)c1cccc(c1)C(=O)c2ccccc2,2.47,2.47
12+
SAMPL9-11,Nalidixic acid,CCn1cc(C(O)=O)c(=O)c2ccc(C)nc12,0.36,1.46
13+
SAMPL9-12,Paracetamol,CC(=O)Nc1ccc(O)cc1,-2.50,-1.59
14+
SAMPL9-13,Pindolol,CC(C)NCC(O)COc1cccc2[nH]ccc12,1.41,0.36
15+
SAMPL9-14,Quinine,COc4ccc3nccc(C(O)C1CC2CCN1CC2C=C)c3c4,3.77,1.41
16+
SAMPL9-15,Sulfamethazine,Cc2cc(C)nc(NS(=O)(=O)c1ccc(N)cc1)n2 ,-1.22,-0.74
17+
SAMPL9-16,Trazodone hydrochloride,Clc1cccc(c1)N4CCN(CCCn3nc2ccccn2c3=O)CC4,5.53,3.77

0 commit comments

Comments
 (0)