24
24
except :
25
25
continue
26
26
27
- import matplotlib .pyplot as plt
28
- from matplotlib import gridspec
29
- import seaborn as sns
30
-
31
27
32
- import sys
33
28
import sqlite3
34
29
import numpy as np
35
30
import pandas as pd
36
- import matplotlib
37
- import matplotlib .pyplot as plt
38
- import matplotlib .gridspec as gridspec
39
- import seaborn as sns
40
- from matplotlib .backends .backend_pdf import PdfPages
41
-
42
-
43
31
import matplotlib .pyplot as plt
44
32
import matplotlib .gridspec as gridspec
45
33
import seaborn as sns
@@ -64,42 +52,51 @@ def plot_correlations(column_corr, column_pvalue, df):
64
52
ax_x_dist .set_axisbelow (True )
65
53
ax_y_dist .set_axisbelow (True )
66
54
67
- hb = ax_main .hexbin (x = column_corr , y = column_pvalue , data = df , gridsize = (20 , 20 ), mincnt = 1 , extent = [- 1 , 1.0 , 0 , 0.1 ])
68
- ax_main .set (xlabel = "Correlation coefficient (R)" , ylabel = "P-value" , xticks = np .arange (- 1 , 1.1 , 0.1 ), yticks = np .arange (0.0 , 0.105 , 0.005 ))
55
+ max_pvalue = df [column_pvalue ].max ()
56
+ bin_size_pvalue = max_pvalue / 10.0
57
+
58
+ hb = ax_main .hexbin (x = column_corr , y = column_pvalue , data = df , gridsize = (40 , 40 ), mincnt = 1 , extent = [- 1 , 1.0 , 0 , max_pvalue ])
59
+ ax_main .set (xlabel = "Correlation coefficient (R)" , ylabel = "P-value" ,
60
+ xticks = np .arange (- 1 , 1.1 , 0.1 ), yticks = np .arange (0.0 , max_pvalue * 1.1 , bin_size_pvalue ))
61
+ ax_main .ticklabel_format (style = 'sci' , axis = 'y' , scilimits = (0 , 0 ), useMathText = True )
69
62
70
- ax_x_dist .hist (x = column_corr , data = df , bins = 40 , align = 'mid' , color = "lightblue" )
71
- ax_x_dist .set (ylabel = 'count' , xlim = (- 1 , 1 ))
63
+ bins = np .arange (- 1 , 1.05 , 0.05 )
64
+ ax_x_dist .hist (x = column_corr , data = df , bins = bins , align = 'mid' , color = "lightblue" )
65
+ ax_x_dist .set (ylabel = 'Frequency' , xlim = (- 1.1 , 1.1 ))
72
66
ax_x_dist .axvline (0 , color = 'k' , linestyle = 'dashed' , linewidth = 1 )
67
+ ax_x_dist .tick_params (axis = "x" , labelsize = 7.5 )
73
68
74
69
ax_xcum_dist = ax_x_dist .twinx ()
75
- ax_xcum_dist .hist (x = column_corr , data = df , bins = 40 , cumulative = True , histtype = 'step' ,
70
+ ax_xcum_dist .hist (x = column_corr , data = df , bins = bins , cumulative = True , histtype = 'step' ,
76
71
density = True , color = 'darkblue' , align = 'mid' )
77
- ax_xcum_dist .set (xlim = (- 1 , 1 ))
72
+ ax_xcum_dist .set (xlim = (- 1.1 , 1. 1 ))
78
73
ax_xcum_dist .tick_params (column_corr , colors = 'darkblue' )
79
74
ax_xcum_dist .set_ylabel ('cumulative' , color = 'darkblue' )
80
75
ax_xcum_dist .set (yticks = np .arange (0.0 , 1.2 , 0.2 ))
81
76
82
- ax_y_dist .hist (x = column_pvalue , data = df , bins = 200 , orientation = 'horizontal' ,
77
+ bins = np .arange (0 , max_pvalue + bin_size_pvalue , bin_size_pvalue )
78
+ ax_y_dist .hist (x = column_pvalue , data = df , bins = bins , orientation = 'horizontal' ,
83
79
align = 'mid' , color = "lightblue" )
84
- ax_y_dist .set (xlabel = 'count ' )
80
+ ax_y_dist .set (xlabel = 'Frequency ' )
85
81
ax_ycum_dist = ax_y_dist .twiny ()
86
- ax_ycum_dist .hist (x = column_pvalue , data = df , bins = 200 , cumulative = True , histtype = 'step' ,
82
+ ax_ycum_dist .hist (x = column_pvalue , data = df , bins = bins , cumulative = True , histtype = 'step' ,
87
83
density = True , color = 'darkblue' , align = 'mid' , orientation = 'horizontal' )
88
84
ax_ycum_dist .tick_params (column_pvalue , colors = 'darkblue' )
89
85
ax_ycum_dist .set_xlabel ('cumulative' , color = 'darkblue' )
90
- ax_ycum_dist .set (xticks = np .arange (0.0 , 1.2 , 0.2 ), ylim = (0 , 0 .1 ))
86
+ ax_ycum_dist .set (xticks = np .arange (0.0 , 1.2 , 0.2 ), ylim = (- bin_size_pvalue , max_pvalue * 1 .1 ))
91
87
92
- plt .setp (ax_x_dist .get_xticklabels (), visible = False )
88
+ # plt.setp(ax_x_dist.get_xticklabels(), visible=False)
93
89
plt .setp (ax_y_dist .get_yticklabels (), visible = False )
94
-
90
+ plt . setp ( ax_x_dist . get_xticklabels (), rotation = 90 )
95
91
plt .setp (ax_main .get_xticklabels (), rotation = 90 )
96
92
plt .setp (ax_y_dist .get_xticklabels (), rotation = 90 )
93
+ plt .setp (ax_ycum_dist .get_xticklabels (), rotation = 90 )
97
94
98
95
fig .subplots_adjust (top = 0.85 , right = 0.85 )
99
96
cbar_ax = fig .add_axes ([0.87 , 0.15 , 0.03 , 0.4 ])
100
97
101
98
cb = plt .colorbar (hb , cax = cbar_ax )
102
- cb .set_label ('counts ' )
99
+ cb .set_label ('Frequency ' )
103
100
104
101
return plt
105
102
@@ -119,11 +116,13 @@ def plot_annotations(column_ppm_error, column_adducts, df):
119
116
ppm_errors = df [column_ppm_error ].dropna ()
120
117
121
118
sns .boxplot (ppm_errors , ax = ax_box )
122
- sns .distplot (ppm_errors , ax = ax_hist )
119
+
120
+ bin_size = 0.1
121
+ bins = np .arange (np .floor (ppm_errors .min ()) - bin_size , np .ceil (ppm_errors .max ()) + bin_size , bin_size ).round (3 )
122
+ ax_hist .hist (x = column_ppm_error , data = df , bins = bins , align = 'mid' , color = "lightblue" )
123
123
124
124
ax_hist .grid (False )
125
125
ax_box .grid (False )
126
- ax_hist .grid (False )
127
126
128
127
std = ppm_errors .std ()
129
128
mean = ppm_errors .mean ()
@@ -137,12 +136,13 @@ def plot_annotations(column_ppm_error, column_adducts, df):
137
136
ax_box .set_title ("Q1={}; median={}; Q3={}" .format (round (Q1 , 2 ), round (median , 2 ), round (Q3 , 2 )))
138
137
139
138
ax_hist .set_title ("mean={}; std={}" .format (round (mean , 2 ), round (std , 2 )))
140
- ax_hist .set (xlabel = "ppm error" )
139
+ ax_hist .set (xlabel = "Ppm error" , ylabel = "Frequency " )
141
140
142
- sns .set (style = "whitegrid" )
143
141
sns .countplot (df [column_adducts ].dropna (), ax = ax_count )
142
+ ax_count .set (xlabel = "Adduct" , ylabel = "Frequency" )
144
143
145
144
plt .setp (ax_box .get_xticklabels (), visible = False )
145
+ plt .setp (ax_count .get_xticklabels (), rotation = 90 )
146
146
147
147
return plt
148
148
@@ -154,14 +154,15 @@ def report(db, pdf_out, column_corr, column_pvalue, column_ppm_error, column_add
154
154
conn = sqlite3 .connect (db )
155
155
cursor = conn .cursor ()
156
156
cursor .execute ("""SELECT name FROM sqlite_master WHERE type='table';""" )
157
-
158
- for table in cursor .fetchall ():
157
+ title = "Summary - BEAMS \n \n \n "
158
+ for i , table in enumerate ( cursor .fetchall () ):
159
159
if str (table [0 ]) == "groups" :
160
160
161
161
df = pd .read_sql_query ("SELECT {}, {} FROM groups" .format (column_corr , column_pvalue ), conn )
162
162
163
163
plt = plot_correlations (column_corr , column_pvalue , df )
164
- plt .suptitle ('Summary - BEAMS\n \n \n Grouping features' , fontsize = 20 )
164
+ plt .suptitle ('{}Grouping features' .format (title ), fontsize = 20 )
165
+ title = "\n \n \n "
165
166
pdf .savefig (dpi = 300 )
166
167
plt .close ()
167
168
@@ -170,7 +171,18 @@ def report(db, pdf_out, column_corr, column_pvalue, column_ppm_error, column_add
170
171
df = pd .read_sql_query ("SELECT {}, {} FROM {}" .format (column_ppm_error , column_adducts , table [0 ]), conn )
171
172
172
173
plt = plot_annotations ("ppm_error" , "adduct" , df )
173
- plt .suptitle ('Annotation\n \n {}' .format (table [0 ].replace ("compounds_" , "" )), fontsize = 20 )
174
+ plt .suptitle ('{}Compound Annotation\n Database: {}' .format (title , table [0 ].replace ("compounds_" , "" )), fontsize = 20 )
175
+ title = "\n \n \n "
174
176
pdf .savefig (dpi = 300 )
175
177
plt .close ()
176
178
conn .close ()
179
+
180
+
181
+ # if __name__ == '__main__':
182
+ #
183
+ # report("../tests/test_results/results_annotation.sqlite", "test_report_01.pdf",
184
+ # "r_value", "p_value", "ppm_error", "adduct")
185
+ # statinfo = os.stat("test_report_01.pdf")
186
+ #
187
+ # report("../tests/test_results/results_pearson_all.sqlite", "test_report_02.pdf",
188
+ # "r_value", "p_value", "ppm_error", "adduct")
0 commit comments