Skip to content

Commit 68e9906

Browse files
committed
Define min and max axes for figures based on the input
1 parent 8d23112 commit 68e9906

File tree

1 file changed

+45
-33
lines changed

1 file changed

+45
-33
lines changed

beams/plots.py

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,10 @@
2424
except:
2525
continue
2626

27-
import matplotlib.pyplot as plt
28-
from matplotlib import gridspec
29-
import seaborn as sns
30-
3127

32-
import sys
3328
import sqlite3
3429
import numpy as np
3530
import pandas as pd
36-
import matplotlib
37-
import matplotlib.pyplot as plt
38-
import matplotlib.gridspec as gridspec
39-
import seaborn as sns
40-
from matplotlib.backends.backend_pdf import PdfPages
41-
42-
4331
import matplotlib.pyplot as plt
4432
import matplotlib.gridspec as gridspec
4533
import seaborn as sns
@@ -64,42 +52,51 @@ def plot_correlations(column_corr, column_pvalue, df):
6452
ax_x_dist.set_axisbelow(True)
6553
ax_y_dist.set_axisbelow(True)
6654

67-
hb = ax_main.hexbin(x=column_corr, y=column_pvalue, data=df, gridsize=(20, 20), mincnt=1, extent=[-1, 1.0, 0, 0.1])
68-
ax_main.set(xlabel="Correlation coefficient (R)", ylabel="P-value", xticks=np.arange(-1, 1.1, 0.1), yticks=np.arange(0.0, 0.105, 0.005))
55+
max_pvalue = df[column_pvalue].max()
56+
bin_size_pvalue = max_pvalue / 10.0
57+
58+
hb = ax_main.hexbin(x=column_corr, y=column_pvalue, data=df, gridsize=(40, 40), mincnt=1, extent=[-1, 1.0, 0, max_pvalue])
59+
ax_main.set(xlabel="Correlation coefficient (R)", ylabel="P-value",
60+
xticks=np.arange(-1, 1.1, 0.1), yticks=np.arange(0.0, max_pvalue * 1.1, bin_size_pvalue))
61+
ax_main.ticklabel_format(style='sci', axis='y', scilimits=(0, 0), useMathText=True)
6962

70-
ax_x_dist.hist(x=column_corr, data=df, bins=40, align='mid', color="lightblue")
71-
ax_x_dist.set(ylabel='count', xlim=(-1, 1))
63+
bins = np.arange(-1, 1.05, 0.05)
64+
ax_x_dist.hist(x=column_corr, data=df, bins=bins, align='mid', color="lightblue")
65+
ax_x_dist.set(ylabel='Frequency', xlim=(-1.1, 1.1))
7266
ax_x_dist.axvline(0, color='k', linestyle='dashed', linewidth=1)
67+
ax_x_dist.tick_params(axis="x", labelsize=7.5)
7368

7469
ax_xcum_dist = ax_x_dist.twinx()
75-
ax_xcum_dist.hist(x=column_corr, data=df, bins=40, cumulative=True, histtype='step',
70+
ax_xcum_dist.hist(x=column_corr, data=df, bins=bins, cumulative=True, histtype='step',
7671
density=True, color='darkblue', align='mid')
77-
ax_xcum_dist.set(xlim=(-1, 1))
72+
ax_xcum_dist.set(xlim=(-1.1, 1.1))
7873
ax_xcum_dist.tick_params(column_corr, colors='darkblue')
7974
ax_xcum_dist.set_ylabel('cumulative', color='darkblue')
8075
ax_xcum_dist.set(yticks=np.arange(0.0, 1.2, 0.2))
8176

82-
ax_y_dist.hist(x=column_pvalue, data=df, bins=200, orientation='horizontal',
77+
bins = np.arange(0, max_pvalue + bin_size_pvalue, bin_size_pvalue)
78+
ax_y_dist.hist(x=column_pvalue, data=df, bins=bins, orientation='horizontal',
8379
align='mid', color="lightblue")
84-
ax_y_dist.set(xlabel='count')
80+
ax_y_dist.set(xlabel='Frequency')
8581
ax_ycum_dist = ax_y_dist.twiny()
86-
ax_ycum_dist.hist(x=column_pvalue, data=df, bins=200, cumulative=True, histtype='step',
82+
ax_ycum_dist.hist(x=column_pvalue, data=df, bins=bins, cumulative=True, histtype='step',
8783
density=True, color='darkblue', align='mid', orientation='horizontal')
8884
ax_ycum_dist.tick_params(column_pvalue, colors='darkblue')
8985
ax_ycum_dist.set_xlabel('cumulative', color='darkblue')
90-
ax_ycum_dist.set(xticks=np.arange(0.0, 1.2, 0.2), ylim=(0, 0.1))
86+
ax_ycum_dist.set(xticks=np.arange(0.0, 1.2, 0.2), ylim=(-bin_size_pvalue, max_pvalue * 1.1))
9187

92-
plt.setp(ax_x_dist.get_xticklabels(), visible=False)
88+
#plt.setp(ax_x_dist.get_xticklabels(), visible=False)
9389
plt.setp(ax_y_dist.get_yticklabels(), visible=False)
94-
90+
plt.setp(ax_x_dist.get_xticklabels(), rotation=90)
9591
plt.setp(ax_main.get_xticklabels(), rotation=90)
9692
plt.setp(ax_y_dist.get_xticklabels(), rotation=90)
93+
plt.setp(ax_ycum_dist.get_xticklabels(), rotation=90)
9794

9895
fig.subplots_adjust(top=0.85, right=0.85)
9996
cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.4])
10097

10198
cb = plt.colorbar(hb, cax=cbar_ax)
102-
cb.set_label('counts')
99+
cb.set_label('Frequency')
103100

104101
return plt
105102

@@ -119,11 +116,13 @@ def plot_annotations(column_ppm_error, column_adducts, df):
119116
ppm_errors = df[column_ppm_error].dropna()
120117

121118
sns.boxplot(ppm_errors, ax=ax_box)
122-
sns.distplot(ppm_errors, ax=ax_hist)
119+
120+
bin_size = 0.1
121+
bins = np.arange(np.floor(ppm_errors.min()) - bin_size, np.ceil(ppm_errors.max()) + bin_size, bin_size).round(3)
122+
ax_hist.hist(x=column_ppm_error, data=df, bins=bins, align='mid', color="lightblue")
123123

124124
ax_hist.grid(False)
125125
ax_box.grid(False)
126-
ax_hist.grid(False)
127126

128127
std = ppm_errors.std()
129128
mean = ppm_errors.mean()
@@ -137,12 +136,13 @@ def plot_annotations(column_ppm_error, column_adducts, df):
137136
ax_box.set_title("Q1={}; median={}; Q3={}".format(round(Q1, 2), round(median, 2), round(Q3, 2)))
138137

139138
ax_hist.set_title("mean={}; std={}".format(round(mean, 2), round(std, 2)))
140-
ax_hist.set(xlabel="ppm error")
139+
ax_hist.set(xlabel="Ppm error", ylabel="Frequency")
141140

142-
sns.set(style="whitegrid")
143141
sns.countplot(df[column_adducts].dropna(), ax=ax_count)
142+
ax_count.set(xlabel="Adduct", ylabel="Frequency")
144143

145144
plt.setp(ax_box.get_xticklabels(), visible=False)
145+
plt.setp(ax_count.get_xticklabels(), rotation=90)
146146

147147
return plt
148148

@@ -154,14 +154,15 @@ def report(db, pdf_out, column_corr, column_pvalue, column_ppm_error, column_add
154154
conn = sqlite3.connect(db)
155155
cursor = conn.cursor()
156156
cursor.execute("""SELECT name FROM sqlite_master WHERE type='table';""")
157-
158-
for table in cursor.fetchall():
157+
title = "Summary - BEAMS\n\n\n"
158+
for i, table in enumerate(cursor.fetchall()):
159159
if str(table[0]) == "groups":
160160

161161
df = pd.read_sql_query("SELECT {}, {} FROM groups".format(column_corr, column_pvalue), conn)
162162

163163
plt = plot_correlations(column_corr, column_pvalue, df)
164-
plt.suptitle('Summary - BEAMS\n\n\nGrouping features', fontsize=20)
164+
plt.suptitle('{}Grouping features'.format(title), fontsize=20)
165+
title = "\n\n\n"
165166
pdf.savefig(dpi=300)
166167
plt.close()
167168

@@ -170,7 +171,18 @@ def report(db, pdf_out, column_corr, column_pvalue, column_ppm_error, column_add
170171
df = pd.read_sql_query("SELECT {}, {} FROM {}".format(column_ppm_error, column_adducts, table[0]), conn)
171172

172173
plt = plot_annotations("ppm_error", "adduct", df)
173-
plt.suptitle('Annotation\n\n{}'.format(table[0].replace("compounds_", "")), fontsize=20)
174+
plt.suptitle('{}Compound Annotation\nDatabase: {}'.format(title, table[0].replace("compounds_", "")), fontsize=20)
175+
title = "\n\n\n"
174176
pdf.savefig(dpi=300)
175177
plt.close()
176178
conn.close()
179+
180+
181+
# if __name__ == '__main__':
182+
#
183+
# report("../tests/test_results/results_annotation.sqlite", "test_report_01.pdf",
184+
# "r_value", "p_value", "ppm_error", "adduct")
185+
# statinfo = os.stat("test_report_01.pdf")
186+
#
187+
# report("../tests/test_results/results_pearson_all.sqlite", "test_report_02.pdf",
188+
# "r_value", "p_value", "ppm_error", "adduct")

0 commit comments

Comments
 (0)