25
25
import seaborn as sns
26
26
27
27
28
- def report (df , column_ppm_error , column_adducts , fn_pdf ):
28
+ import sys
29
+ import sqlite3
30
+ import numpy as np
31
+ import pandas as pd
32
+ import matplotlib
33
+ import matplotlib .pyplot as plt
34
+ import matplotlib .gridspec as gridspec
35
+ import seaborn as sns
36
+ from matplotlib .backends .backend_pdf import PdfPages
37
+
38
+
39
+ import matplotlib .pyplot as plt
40
+ import matplotlib .gridspec as gridspec
41
+ import seaborn as sns
42
+ from matplotlib .backends .backend_pdf import PdfPages
43
+
44
+
45
+ def plot_correlations (column_corr , column_pvalue , df ):
46
+
47
+ fig = plt .figure (figsize = (8 , 8 ))
48
+ fig .set_size_inches (8.27 , 11.69 )
49
+
50
+ gs = gridspec .GridSpec (3 , 3 )
51
+ ax_main = plt .subplot (gs [1 :3 , :2 ])
52
+ ax_x_dist = plt .subplot (gs [0 , :2 ], sharex = ax_main )
53
+ ax_y_dist = plt .subplot (gs [1 :3 , 2 ], sharey = ax_main )
54
+
55
+ ax_main .grid (linestyle = 'dashed' )
56
+ ax_x_dist .grid (linestyle = 'dashed' )
57
+ ax_y_dist .grid (linestyle = 'dashed' )
58
+
59
+ ax_main .set_axisbelow (True )
60
+ ax_x_dist .set_axisbelow (True )
61
+ ax_y_dist .set_axisbelow (True )
62
+
63
+ hb = ax_main .hexbin (x = column_corr , y = column_pvalue , data = df , gridsize = (20 , 20 ), mincnt = 1 , extent = [- 1 , 1.0 , 0 , 0.1 ])
64
+ ax_main .set (xlabel = "Correlation coefficient (R)" , ylabel = "P-value" , xticks = np .arange (- 1 , 1.1 , 0.1 ), yticks = np .arange (0.0 , 0.105 , 0.005 ))
65
+
66
+ ax_x_dist .hist (x = column_corr , data = df , bins = 40 , align = 'mid' , color = "lightblue" )
67
+ ax_x_dist .set (ylabel = 'count' , xlim = (- 1 , 1 ))
68
+ ax_x_dist .axvline (0 , color = 'k' , linestyle = 'dashed' , linewidth = 1 )
69
+
70
+ ax_xcum_dist = ax_x_dist .twinx ()
71
+ ax_xcum_dist .hist (x = column_corr , data = df , bins = 40 , cumulative = True , histtype = 'step' ,
72
+ density = True , color = 'darkblue' , align = 'mid' )
73
+ ax_xcum_dist .set (xlim = (- 1 , 1 ))
74
+ ax_xcum_dist .tick_params (column_corr , colors = 'darkblue' )
75
+ ax_xcum_dist .set_ylabel ('cumulative' , color = 'darkblue' )
76
+ ax_xcum_dist .set (yticks = np .arange (0.0 , 1.2 , 0.2 ))
77
+
78
+ ax_y_dist .hist (x = column_pvalue , data = df , bins = 200 , orientation = 'horizontal' ,
79
+ align = 'mid' , color = "lightblue" )
80
+ ax_y_dist .set (xlabel = 'count' )
81
+ ax_ycum_dist = ax_y_dist .twiny ()
82
+ ax_ycum_dist .hist (x = column_pvalue , data = df , bins = 200 , cumulative = True , histtype = 'step' ,
83
+ density = True , color = 'darkblue' , align = 'mid' , orientation = 'horizontal' )
84
+ ax_ycum_dist .tick_params (column_pvalue , colors = 'darkblue' )
85
+ ax_ycum_dist .set_xlabel ('cumulative' , color = 'darkblue' )
86
+ ax_ycum_dist .set (xticks = np .arange (0.0 , 1.2 , 0.2 ), ylim = (0 , 0.1 ))
87
+
88
+ plt .setp (ax_x_dist .get_xticklabels (), visible = False )
89
+ plt .setp (ax_y_dist .get_yticklabels (), visible = False )
90
+
91
+ plt .setp (ax_main .get_xticklabels (), rotation = 90 )
92
+ plt .setp (ax_y_dist .get_xticklabels (), rotation = 90 )
93
+
94
+ fig .subplots_adjust (top = 0.85 , right = 0.85 )
95
+ cbar_ax = fig .add_axes ([0.87 , 0.15 , 0.03 , 0.4 ])
96
+
97
+ cb = plt .colorbar (hb , cax = cbar_ax )
98
+ cb .set_label ('counts' )
99
+
100
+ return plt
101
+
102
+
103
+ def plot_annotations (column_ppm_error , column_adducts , df ):
29
104
30
105
fig = plt .figure ()
106
+ fig .set_size_inches (8.27 , 11.69 )
31
107
32
- gs = gridspec .GridSpec (2 , 2 , height_ratios = [1 , 5 ])
108
+ gs = gridspec .GridSpec (5 , 2 , height_ratios = [1 , 1 , 5 , 1 , 1 ])
33
109
34
- ax_box = plt .subplot (gs [0 ])
35
- ax_hist = plt .subplot (gs [2 ], sharex = ax_box )
36
- ax_count = plt .subplot (gs [3 ])
37
- #ax = plt.subplot(gs[1])
110
+ ax_box = plt .subplot (gs [2 ])
111
+ ax_hist = plt .subplot (gs [4 ], sharex = ax_box )
112
+ ax_count = plt .subplot (gs [5 ])
113
+ # ax = plt.subplot(gs[1])
38
114
39
115
ppm_errors = df [column_ppm_error ].dropna ()
40
116
41
117
sns .boxplot (ppm_errors , ax = ax_box )
42
118
sns .distplot (ppm_errors , ax = ax_hist )
43
119
120
+ ax_hist .grid (False )
121
+ ax_box .grid (False )
122
+ ax_hist .grid (False )
123
+
44
124
std = ppm_errors .std ()
45
125
mean = ppm_errors .mean ()
46
126
median = ppm_errors .median ()
@@ -49,7 +129,7 @@ def report(df, column_ppm_error, column_adducts, fn_pdf):
49
129
50
130
# Remove x axis name for the boxplot
51
131
ax_box .set (xlabel = "" )
52
- #ax_box.set_xticks([])
132
+ # ax_box.set_xticks([])
53
133
ax_box .set_title ("Q1={}; median={}; Q3={}" .format (round (Q1 , 2 ), round (median , 2 ), round (Q3 , 2 )))
54
134
55
135
ax_hist .set_title ("mean={}; std={}" .format (round (mean , 2 ), round (std , 2 )))
@@ -60,6 +140,33 @@ def report(df, column_ppm_error, column_adducts, fn_pdf):
60
140
61
141
plt .setp (ax_box .get_xticklabels (), visible = False )
62
142
63
- fig .suptitle ('Summary - BEAMS' , fontsize = 20 )
64
- fig .set_size_inches (11.69 , 8.27 )
65
- fig .savefig (fn_pdf , format = "pdf" )
143
+ return plt
144
+
145
+
146
+ def report (db , pdf_out , column_corr , column_pvalue , column_ppm_error , column_adducts ):
147
+
148
+ with PdfPages (pdf_out ) as pdf :
149
+
150
+ conn = sqlite3 .connect (db )
151
+ cursor = conn .cursor ()
152
+ cursor .execute ("""SELECT name FROM sqlite_master WHERE type='table';""" )
153
+
154
+ for table in cursor .fetchall ():
155
+ if str (table [0 ]) == "groups" :
156
+
157
+ df = pd .read_sql_query ("SELECT {}, {} FROM groups" .format (column_corr , column_pvalue ), conn )
158
+
159
+ plt = plot_correlations (column_corr , column_pvalue , df )
160
+ plt .suptitle ('Summary - BEAMS\n \n \n Grouping features' , fontsize = 20 )
161
+ pdf .savefig (dpi = 300 )
162
+ plt .close ()
163
+
164
+ elif table [0 ][0 :10 ] == "compounds_" :
165
+
166
+ df = pd .read_sql_query ("SELECT {}, {} FROM {}" .format (column_ppm_error , column_adducts , table [0 ]), conn )
167
+
168
+ plt = plot_annotations ("ppm_error" , "adduct" , df )
169
+ plt .suptitle ('Annotation\n \n {}' .format (table [0 ].replace ("compounds_" , "" )), fontsize = 20 )
170
+ pdf .savefig (dpi = 300 )
171
+ plt .close ()
172
+ conn .close ()
0 commit comments