@@ -789,3 +789,217 @@ def rt_distribution_baseline(
789789 )
790790
791791 return fig
792+
793+
794+ def score_scatter_plot_df (
795+ psm_df : pd .DataFrame ,
796+ fdr_threshold : float = 0.01 ,
797+ ) -> go .Figure :
798+ """
799+ Plot PSM scores before and after rescoring from a dataframe.
800+
801+ Parameters
802+ ----------
803+ psm_df
804+ Dataframe with PSM information including score_before, score_after,
805+ qvalue_before, qvalue_after, and is_decoy columns.
806+ fdr_threshold
807+ FDR threshold for drawing threshold lines.
808+
809+ Returns
810+ -------
811+ go.Figure
812+ Plotly figure with score comparison.
813+ """
814+ if "score_before" not in psm_df .columns or "score_after" not in psm_df .columns :
815+ figure = go .Figure ()
816+ figure .add_annotation (
817+ text = "No before/after score data available for comparison." ,
818+ showarrow = False ,
819+ )
820+ return figure
821+
822+ # Prepare data
823+ plot_df = psm_df .copy ()
824+ plot_df ["PSM type" ] = plot_df ["is_decoy" ].map ({True : "decoy" , False : "target" })
825+
826+ # Get score thresholds
827+ try :
828+ score_threshold_before = (
829+ plot_df [plot_df ["qvalue_before" ] <= fdr_threshold ]
830+ .sort_values ("qvalue_before" , ascending = False )["score_before" ]
831+ .iloc [0 ]
832+ )
833+ except (IndexError , KeyError ):
834+ score_threshold_before = None
835+
836+ try :
837+ score_threshold_after = (
838+ plot_df [plot_df ["qvalue_after" ] <= fdr_threshold ]
839+ .sort_values ("qvalue_after" , ascending = False )["score_after" ]
840+ .iloc [0 ]
841+ )
842+ except (IndexError , KeyError ):
843+ score_threshold_after = None
844+
845+ # Plot
846+ fig = px .scatter (
847+ data_frame = plot_df ,
848+ x = "score_before" ,
849+ y = "score_after" ,
850+ color = "PSM type" ,
851+ marginal_x = "histogram" ,
852+ marginal_y = "histogram" ,
853+ opacity = 0.1 ,
854+ labels = {
855+ "score_before" : "PSM score (before rescoring)" ,
856+ "score_after" : "PSM score (after rescoring)" ,
857+ },
858+ )
859+
860+ # Draw FDR thresholds
861+ if score_threshold_before :
862+ fig .add_vline (x = score_threshold_before , line_dash = "dash" , row = 1 , col = 1 )
863+ fig .add_vline (x = score_threshold_before , line_dash = "dash" , row = 2 , col = 1 )
864+ if score_threshold_after :
865+ fig .add_hline (y = score_threshold_after , line_dash = "dash" , row = 1 , col = 1 )
866+ fig .add_hline (y = score_threshold_after , line_dash = "dash" , row = 1 , col = 2 )
867+
868+ return fig
869+
870+
871+ def fdr_plot_comparison_df (
872+ psm_df : pd .DataFrame ,
873+ ) -> go .Figure :
874+ """
875+ Plot number of identifications in function of FDR threshold before/after rescoring from dataframe.
876+
877+ Parameters
878+ ----------
879+ psm_df
880+ Dataframe with PSM information including qvalue_before, qvalue_after, and is_decoy columns.
881+
882+ Returns
883+ -------
884+ go.Figure
885+ Plotly figure with FDR comparison.
886+ """
887+ if "qvalue_before" not in psm_df .columns or "qvalue_after" not in psm_df .columns :
888+ figure = go .Figure ()
889+ figure .add_annotation (
890+ text = "No before/after q-value data available for comparison." ,
891+ showarrow = False ,
892+ )
893+ return figure
894+
895+ # Filter targets only
896+ targets = psm_df [~ psm_df ["is_decoy" ]].copy ()
897+
898+ # Prepare data in long format
899+ plot_data = pd .concat (
900+ [
901+ targets [["qvalue_before" ]]
902+ .rename (columns = {"qvalue_before" : "q-value" })
903+ .assign (** {"before/after" : "before rescoring" }),
904+ targets [["qvalue_after" ]]
905+ .rename (columns = {"qvalue_after" : "q-value" })
906+ .assign (** {"before/after" : "after rescoring" }),
907+ ]
908+ )
909+
910+ # Plot
911+ fig = px .ecdf (
912+ data_frame = plot_data ,
913+ x = "q-value" ,
914+ color = "before/after" ,
915+ log_x = True ,
916+ ecdfnorm = None ,
917+ labels = {
918+ "q-value" : "FDR threshold" ,
919+ "before/after" : "" ,
920+ },
921+ color_discrete_map = {
922+ "before rescoring" : "#316395" ,
923+ "after rescoring" : "#319545" ,
924+ },
925+ )
926+ fig .add_vline (x = 0.01 , line_dash = "dash" , line_color = "black" )
927+ fig .update_layout (yaxis_title = "Identified PSMs" )
928+ return fig
929+
930+
931+ def identification_overlap_df (
932+ psm_df : pd .DataFrame ,
933+ fdr_threshold : float = 0.01 ,
934+ ) -> go .Figure :
935+ """
936+ Plot stacked bar charts of removed, retained, and gained PSMs and peptides from dataframe.
937+
938+ Parameters
939+ ----------
940+ psm_df
941+ Dataframe with PSM information including qvalue_before, qvalue_after,
942+ is_decoy, and peptidoform columns.
943+ fdr_threshold
944+ FDR threshold for counting identifications.
945+
946+ Returns
947+ -------
948+ go.Figure
949+ Plotly figure with identification overlap.
950+ """
951+ if "qvalue_before" not in psm_df .columns or "qvalue_after" not in psm_df .columns :
952+ figure = go .Figure ()
953+ figure .add_annotation (
954+ text = "No before/after q-value data available for comparison." ,
955+ showarrow = False ,
956+ )
957+ return figure
958+
959+ overlap_data = defaultdict (dict )
960+
961+ # PSM level
962+ targets = psm_df [~ psm_df ["is_decoy" ]]
963+ psms_before = set (targets [targets ["qvalue_before" ] <= fdr_threshold ].index )
964+ psms_after = set (targets [targets ["qvalue_after" ] <= fdr_threshold ].index )
965+
966+ overlap_data ["removed" ]["psms" ] = - len (psms_before - psms_after )
967+ overlap_data ["retained" ]["psms" ] = len (psms_after .intersection (psms_before ))
968+ overlap_data ["gained" ]["psms" ] = len (psms_after - psms_before )
969+
970+ # Peptide level
971+ if "peptidoform" in psm_df .columns :
972+ peptides_before = set (
973+ targets [targets ["qvalue_before" ] <= fdr_threshold ]["peptidoform" ].unique ()
974+ )
975+ peptides_after = set (
976+ targets [targets ["qvalue_after" ] <= fdr_threshold ]["peptidoform" ].unique ()
977+ )
978+
979+ overlap_data ["removed" ]["peptides" ] = - len (peptides_before - peptides_after )
980+ overlap_data ["retained" ]["peptides" ] = len (peptides_after .intersection (peptides_before ))
981+ overlap_data ["gained" ]["peptides" ] = len (peptides_after - peptides_before )
982+
983+ colors = ["#953331" , "#316395" , "#319545" ]
984+ levels = list (overlap_data ["retained" ].keys ())
985+ fig = plotly .subplots .make_subplots (rows = len (levels ), cols = 1 )
986+
987+ for i , level in enumerate (levels ):
988+ for (item , data ), color in zip (overlap_data .items (), colors ):
989+ if level not in data :
990+ continue
991+ fig .add_trace (
992+ go .Bar (
993+ y = [level ],
994+ x = [data [level ]],
995+ marker = {"color" : color },
996+ orientation = "h" ,
997+ name = item ,
998+ showlegend = True if i == 0 else False ,
999+ ),
1000+ row = i + 1 ,
1001+ col = 1 ,
1002+ )
1003+ fig .update_layout (barmode = "relative" )
1004+
1005+ return fig
0 commit comments