Skip to content

Commit cb4d62b

Browse files
committed
Update plots for reporting
1 parent 1ff2a4a commit cb4d62b

File tree

5 files changed

+139
-87
lines changed

5 files changed

+139
-87
lines changed

beams/__main__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,12 @@ def main():
203203
parser_sr.add_argument('-i', '--intensity-matrix', type=str, required=False,
204204
help="Tab-delimited intensity matrix.")
205205

206-
parser_sr.add_argument('-o', '--output', type=str, required=False,
206+
parser_sr.add_argument('-o', '--output', type=str, required=True,
207207
help="Output file for the summary")
208208

209+
parser_sr.add_argument('-p', '--pdf', type=str, required=False,
210+
help="Output pdf file for the summary plots")
211+
209212
parser_sr.add_argument('-d', '--db', type=str, required=True,
210213
help="Sqlite database that contains the results from the previous steps.")
211214

@@ -298,6 +301,10 @@ def main():
298301
df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix)
299302
df_out = annotation.summary(df, db=args.db, single_row=args.single_row, single_column=args.single_column, convert_rt=args.convert_rt, ndigits_mz=args.ndigits_mz)
300303
df_out.to_csv(args.output, sep=separators[args.sep], index=False)
304+
if args.pdf:
305+
plots.report(db=args.db, pdf_out=args.pdf,
306+
column_corr="r_value", column_pvalue="pvalue",
307+
column_ppm_error="ppm_error", column_adducts="adduct")
301308

302309
if args.step == "start-gui":
303310
from PyQt5 import QtWidgets

beams/gui.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -490,8 +490,9 @@ def run(self):
490490

491491
separators = {"tab": "\t", "comma": ","}
492492
df_out.to_csv(self.lineEdit_summary_filename.text(), sep=separators[self.comboBox_separator.currentText()], index=False)
493-
plots.report(df=df_out, fn_pdf=os.path.join(os.path.dirname(self.lineEdit_summary_filename.text()), "report.pdf"),
494-
column_ppm_error="ppm_error", column_adducts="adduct")
493+
ext = os.path.splitext(self.lineEdit_summary_filename.text())[1]
494+
plots.report(db=self.lineEdit_sql_database.text(), pdf_out=str(self.lineEdit_summary_filename.text()).replace(ext, ".pdf"),
495+
column_corr="r_value", column_pvalue="p_value", column_ppm_error="ppm_error", column_adducts="adduct")
495496
print("Done")
496497
print("")
497498

beams/plots.py

Lines changed: 117 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,102 @@
2525
import seaborn as sns
2626

2727

28-
def report(df, column_ppm_error, column_adducts, fn_pdf):
28+
import sys
29+
import sqlite3
30+
import numpy as np
31+
import pandas as pd
32+
import matplotlib
33+
import matplotlib.pyplot as plt
34+
import matplotlib.gridspec as gridspec
35+
import seaborn as sns
36+
from matplotlib.backends.backend_pdf import PdfPages
37+
38+
39+
import matplotlib.pyplot as plt
40+
import matplotlib.gridspec as gridspec
41+
import seaborn as sns
42+
from matplotlib.backends.backend_pdf import PdfPages
43+
44+
45+
def plot_correlations(column_corr, column_pvalue, df):
46+
47+
fig = plt.figure(figsize=(8, 8))
48+
fig.set_size_inches(8.27, 11.69)
49+
50+
gs = gridspec.GridSpec(3, 3)
51+
ax_main = plt.subplot(gs[1:3, :2])
52+
ax_x_dist = plt.subplot(gs[0, :2], sharex=ax_main)
53+
ax_y_dist = plt.subplot(gs[1:3, 2], sharey=ax_main)
54+
55+
ax_main.grid(linestyle='dashed')
56+
ax_x_dist.grid(linestyle='dashed')
57+
ax_y_dist.grid(linestyle='dashed')
58+
59+
ax_main.set_axisbelow(True)
60+
ax_x_dist.set_axisbelow(True)
61+
ax_y_dist.set_axisbelow(True)
62+
63+
hb = ax_main.hexbin(x=column_corr, y=column_pvalue, data=df, gridsize=(20, 20), mincnt=1, extent=[-1, 1.0, 0, 0.1])
64+
ax_main.set(xlabel="Correlation coefficient (R)", ylabel="P-value", xticks=np.arange(-1, 1.1, 0.1), yticks=np.arange(0.0, 0.105, 0.005))
65+
66+
ax_x_dist.hist(x=column_corr, data=df, bins=40, align='mid', color="lightblue")
67+
ax_x_dist.set(ylabel='count', xlim=(-1, 1))
68+
ax_x_dist.axvline(0, color='k', linestyle='dashed', linewidth=1)
69+
70+
ax_xcum_dist = ax_x_dist.twinx()
71+
ax_xcum_dist.hist(x=column_corr, data=df, bins=40, cumulative=True, histtype='step',
72+
density=True, color='darkblue', align='mid')
73+
ax_xcum_dist.set(xlim=(-1, 1))
74+
ax_xcum_dist.tick_params(column_corr, colors='darkblue')
75+
ax_xcum_dist.set_ylabel('cumulative', color='darkblue')
76+
ax_xcum_dist.set(yticks=np.arange(0.0, 1.2, 0.2))
77+
78+
ax_y_dist.hist(x=column_pvalue, data=df, bins=200, orientation='horizontal',
79+
align='mid', color="lightblue")
80+
ax_y_dist.set(xlabel='count')
81+
ax_ycum_dist = ax_y_dist.twiny()
82+
ax_ycum_dist.hist(x=column_pvalue, data=df, bins=200, cumulative=True, histtype='step',
83+
density=True, color='darkblue', align='mid', orientation='horizontal')
84+
ax_ycum_dist.tick_params(column_pvalue, colors='darkblue')
85+
ax_ycum_dist.set_xlabel('cumulative', color='darkblue')
86+
ax_ycum_dist.set(xticks=np.arange(0.0, 1.2, 0.2), ylim=(0, 0.1))
87+
88+
plt.setp(ax_x_dist.get_xticklabels(), visible=False)
89+
plt.setp(ax_y_dist.get_yticklabels(), visible=False)
90+
91+
plt.setp(ax_main.get_xticklabels(), rotation=90)
92+
plt.setp(ax_y_dist.get_xticklabels(), rotation=90)
93+
94+
fig.subplots_adjust(top=0.85, right=0.85)
95+
cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.4])
96+
97+
cb = plt.colorbar(hb, cax=cbar_ax)
98+
cb.set_label('counts')
99+
100+
return plt
101+
102+
103+
def plot_annotations(column_ppm_error, column_adducts, df):
29104

30105
fig = plt.figure()
106+
fig.set_size_inches(8.27, 11.69)
31107

32-
gs = gridspec.GridSpec(2, 2, height_ratios=[1, 5])
108+
gs = gridspec.GridSpec(5, 2, height_ratios=[1, 1, 5, 1, 1])
33109

34-
ax_box = plt.subplot(gs[0])
35-
ax_hist = plt.subplot(gs[2], sharex=ax_box)
36-
ax_count = plt.subplot(gs[3])
37-
#ax = plt.subplot(gs[1])
110+
ax_box = plt.subplot(gs[2])
111+
ax_hist = plt.subplot(gs[4], sharex=ax_box)
112+
ax_count = plt.subplot(gs[5])
113+
# ax = plt.subplot(gs[1])
38114

39115
ppm_errors = df[column_ppm_error].dropna()
40116

41117
sns.boxplot(ppm_errors, ax=ax_box)
42118
sns.distplot(ppm_errors, ax=ax_hist)
43119

120+
ax_hist.grid(False)
121+
ax_box.grid(False)
122+
ax_hist.grid(False)
123+
44124
std = ppm_errors.std()
45125
mean = ppm_errors.mean()
46126
median = ppm_errors.median()
@@ -49,7 +129,7 @@ def report(df, column_ppm_error, column_adducts, fn_pdf):
49129

50130
# Remove x axis name for the boxplot
51131
ax_box.set(xlabel="")
52-
#ax_box.set_xticks([])
132+
# ax_box.set_xticks([])
53133
ax_box.set_title("Q1={}; median={}; Q3={}".format(round(Q1, 2), round(median, 2), round(Q3, 2)))
54134

55135
ax_hist.set_title("mean={}; std={}".format(round(mean, 2), round(std, 2)))
@@ -60,6 +140,33 @@ def report(df, column_ppm_error, column_adducts, fn_pdf):
60140

61141
plt.setp(ax_box.get_xticklabels(), visible=False)
62142

63-
fig.suptitle('Summary - BEAMS', fontsize=20)
64-
fig.set_size_inches(11.69, 8.27)
65-
fig.savefig(fn_pdf, format="pdf")
143+
return plt
144+
145+
146+
def report(db, pdf_out, column_corr, column_pvalue, column_ppm_error, column_adducts):
147+
148+
with PdfPages(pdf_out) as pdf:
149+
150+
conn = sqlite3.connect(db)
151+
cursor = conn.cursor()
152+
cursor.execute("""SELECT name FROM sqlite_master WHERE type='table';""")
153+
154+
for table in cursor.fetchall():
155+
if str(table[0]) == "groups":
156+
157+
df = pd.read_sql_query("SELECT {}, {} FROM groups".format(column_corr, column_pvalue), conn)
158+
159+
plt = plot_correlations(column_corr, column_pvalue, df)
160+
plt.suptitle('Summary - BEAMS\n\n\nGrouping features', fontsize=20)
161+
pdf.savefig(dpi=300)
162+
plt.close()
163+
164+
elif table[0][0:10] == "compounds_":
165+
166+
df = pd.read_sql_query("SELECT {}, {} FROM {}".format(column_ppm_error, column_adducts, table[0]), conn)
167+
168+
plt = plot_annotations("ppm_error", "adduct", df)
169+
plt.suptitle('Annotation\n\n{}'.format(table[0].replace("compounds_", "")), fontsize=20)
170+
pdf.savefig(dpi=300)
171+
plt.close()
172+
conn.close()

conda/meta.yaml

Lines changed: 0 additions & 58 deletions
This file was deleted.

tests/test_plots.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,25 @@
55
import os
66
import numpy as np
77
import pandas as pd
8-
from tests.utils import to_test_results
8+
from tests.utils import to_test_data, to_test_results
99
from beams.plots import report
1010

1111

1212
class PlotsTestCase(unittest.TestCase):
1313

1414
def test_report(self):
1515

16-
np.random.seed(0)
17-
n = 1000
18-
mu, sigma = 0, 0.1 # mean and standard deviation
19-
s = np.random.normal(mu, sigma, n)
20-
21-
lib = ["[M+H]+", "[M+Na]+", "[M+K]+"]
22-
adducts = [lib[i] for i in np.random.randint(3, size=n)]
23-
24-
df = pd.DataFrame({'ppm_error': s, "adduct": adducts})
25-
26-
report(df, "ppm_error", "adduct", to_test_results("test_report_01.pdf"))
27-
16+
report(to_test_data("results_annotation.sqlite"), to_test_results("test_report_01.pdf"),
17+
"r_value", "p_value", "ppm_error", "adduct")
2818
statinfo = os.stat(to_test_results("test_report_01.pdf"))
29-
# print statinfo.st_size
30-
self.assertTrue(statinfo.st_size > 16000)
31-
19+
#print(statinfo.st_size)
20+
self.assertTrue(statinfo.st_size > 700)
21+
22+
report(to_test_data("results_pearson_all.sqlite"), to_test_results("test_report_02.pdf"),
23+
"r_value", "p_value", "ppm_error", "adduct")
24+
statinfo = os.stat(to_test_results("test_report_02.pdf"))
25+
print(statinfo.st_size)
26+
#self.assertTrue(statinfo.st_size > 25000)
3227

3328
if __name__ == '__main__':
3429
unittest.main()

0 commit comments

Comments
 (0)