Skip to content

Commit d66426e

Browse files
making report generation funcitonal again
1 parent 6f49935 commit d66426e

File tree

4 files changed

+600
-44
lines changed

4 files changed

+600
-44
lines changed

ms2rescore/report/__main__.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import logging
2+
from pathlib import Path
23

34
import click
5+
import psm_utils.io
46
from rich.logging import RichHandler
57

68
from ms2rescore.report.generate import generate_report
@@ -9,8 +11,19 @@
911

1012

1113
@click.command()
12-
@click.argument("output_prefix", type=str)
13-
def main(**kwargs):
14+
@click.argument("psm_file", type=click.Path(exists=True))
15+
@click.option(
16+
"--output",
17+
"-o",
18+
type=click.Path(),
19+
default=None,
20+
help="Output path for the report HTML file. If not provided, will be based on PSM file name.",
21+
)
22+
def main(psm_file, output):
23+
"""Generate MS²Rescore report from a PSM TSV file.
24+
25+
PSM_FILE: Path to the PSM TSV file (e.g., output.psms.tsv)
26+
"""
1427
logging.getLogger("mokapot").setLevel(logging.WARNING)
1528
logging.basicConfig(
1629
level=logging.INFO,
@@ -19,7 +32,38 @@ def main(**kwargs):
1932
)
2033

2134
try:
22-
generate_report(kwargs["output_prefix"])
35+
psm_file_path = Path(psm_file)
36+
37+
# Determine output path
38+
if output:
39+
output_path = Path(output)
40+
else:
41+
# Try to infer from ms2rescore naming convention
42+
if ".ms2rescore.psms.tsv" in psm_file_path.name:
43+
output_prefix = str(psm_file_path).replace(".psms.tsv", "")
44+
else:
45+
# Use the PSM file name without extension
46+
output_prefix = str(psm_file_path.with_suffix(""))
47+
output_path = Path(output_prefix + ".report.html")
48+
49+
logger.info(f"Reading PSMs from {psm_file_path}...")
50+
psm_list = psm_utils.io.read_file(psm_file_path, filetype="tsv", show_progressbar=True)
51+
52+
logger.info("Generating report...")
53+
# Try to infer output prefix for finding other files
54+
if ".ms2rescore.psms.tsv" in psm_file_path.name:
55+
output_prefix = str(psm_file_path).replace(".psms.tsv", "")
56+
else:
57+
output_prefix = str(psm_file_path.with_suffix(""))
58+
59+
generate_report(
60+
output_path_prefix=output_prefix,
61+
psm_list=psm_list,
62+
output_file=output_path,
63+
)
64+
65+
logger.info(f"✓ Report generated: {output_path}")
66+
2367
except Exception as e:
2468
logger.exception(e)
2569
exit(1)

ms2rescore/report/charts.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,3 +789,217 @@ def rt_distribution_baseline(
789789
)
790790

791791
return fig
792+
793+
794+
def score_scatter_plot_df(
795+
psm_df: pd.DataFrame,
796+
fdr_threshold: float = 0.01,
797+
) -> go.Figure:
798+
"""
799+
Plot PSM scores before and after rescoring from a dataframe.
800+
801+
Parameters
802+
----------
803+
psm_df
804+
Dataframe with PSM information including score_before, score_after,
805+
qvalue_before, qvalue_after, and is_decoy columns.
806+
fdr_threshold
807+
FDR threshold for drawing threshold lines.
808+
809+
Returns
810+
-------
811+
go.Figure
812+
Plotly figure with score comparison.
813+
"""
814+
if "score_before" not in psm_df.columns or "score_after" not in psm_df.columns:
815+
figure = go.Figure()
816+
figure.add_annotation(
817+
text="No before/after score data available for comparison.",
818+
showarrow=False,
819+
)
820+
return figure
821+
822+
# Prepare data
823+
plot_df = psm_df.copy()
824+
plot_df["PSM type"] = plot_df["is_decoy"].map({True: "decoy", False: "target"})
825+
826+
# Get score thresholds
827+
try:
828+
score_threshold_before = (
829+
plot_df[plot_df["qvalue_before"] <= fdr_threshold]
830+
.sort_values("qvalue_before", ascending=False)["score_before"]
831+
.iloc[0]
832+
)
833+
except (IndexError, KeyError):
834+
score_threshold_before = None
835+
836+
try:
837+
score_threshold_after = (
838+
plot_df[plot_df["qvalue_after"] <= fdr_threshold]
839+
.sort_values("qvalue_after", ascending=False)["score_after"]
840+
.iloc[0]
841+
)
842+
except (IndexError, KeyError):
843+
score_threshold_after = None
844+
845+
# Plot
846+
fig = px.scatter(
847+
data_frame=plot_df,
848+
x="score_before",
849+
y="score_after",
850+
color="PSM type",
851+
marginal_x="histogram",
852+
marginal_y="histogram",
853+
opacity=0.1,
854+
labels={
855+
"score_before": "PSM score (before rescoring)",
856+
"score_after": "PSM score (after rescoring)",
857+
},
858+
)
859+
860+
# Draw FDR thresholds
861+
if score_threshold_before:
862+
fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
863+
fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
864+
if score_threshold_after:
865+
fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
866+
fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
867+
868+
return fig
869+
870+
871+
def fdr_plot_comparison_df(
872+
psm_df: pd.DataFrame,
873+
) -> go.Figure:
874+
"""
875+
Plot number of identifications in function of FDR threshold before/after rescoring from dataframe.
876+
877+
Parameters
878+
----------
879+
psm_df
880+
Dataframe with PSM information including qvalue_before, qvalue_after, and is_decoy columns.
881+
882+
Returns
883+
-------
884+
go.Figure
885+
Plotly figure with FDR comparison.
886+
"""
887+
if "qvalue_before" not in psm_df.columns or "qvalue_after" not in psm_df.columns:
888+
figure = go.Figure()
889+
figure.add_annotation(
890+
text="No before/after q-value data available for comparison.",
891+
showarrow=False,
892+
)
893+
return figure
894+
895+
# Filter targets only
896+
targets = psm_df[~psm_df["is_decoy"]].copy()
897+
898+
# Prepare data in long format
899+
plot_data = pd.concat(
900+
[
901+
targets[["qvalue_before"]]
902+
.rename(columns={"qvalue_before": "q-value"})
903+
.assign(**{"before/after": "before rescoring"}),
904+
targets[["qvalue_after"]]
905+
.rename(columns={"qvalue_after": "q-value"})
906+
.assign(**{"before/after": "after rescoring"}),
907+
]
908+
)
909+
910+
# Plot
911+
fig = px.ecdf(
912+
data_frame=plot_data,
913+
x="q-value",
914+
color="before/after",
915+
log_x=True,
916+
ecdfnorm=None,
917+
labels={
918+
"q-value": "FDR threshold",
919+
"before/after": "",
920+
},
921+
color_discrete_map={
922+
"before rescoring": "#316395",
923+
"after rescoring": "#319545",
924+
},
925+
)
926+
fig.add_vline(x=0.01, line_dash="dash", line_color="black")
927+
fig.update_layout(yaxis_title="Identified PSMs")
928+
return fig
929+
930+
931+
def identification_overlap_df(
932+
psm_df: pd.DataFrame,
933+
fdr_threshold: float = 0.01,
934+
) -> go.Figure:
935+
"""
936+
Plot stacked bar charts of removed, retained, and gained PSMs and peptides from dataframe.
937+
938+
Parameters
939+
----------
940+
psm_df
941+
Dataframe with PSM information including qvalue_before, qvalue_after,
942+
is_decoy, and peptidoform columns.
943+
fdr_threshold
944+
FDR threshold for counting identifications.
945+
946+
Returns
947+
-------
948+
go.Figure
949+
Plotly figure with identification overlap.
950+
"""
951+
if "qvalue_before" not in psm_df.columns or "qvalue_after" not in psm_df.columns:
952+
figure = go.Figure()
953+
figure.add_annotation(
954+
text="No before/after q-value data available for comparison.",
955+
showarrow=False,
956+
)
957+
return figure
958+
959+
overlap_data = defaultdict(dict)
960+
961+
# PSM level
962+
targets = psm_df[~psm_df["is_decoy"]]
963+
psms_before = set(targets[targets["qvalue_before"] <= fdr_threshold].index)
964+
psms_after = set(targets[targets["qvalue_after"] <= fdr_threshold].index)
965+
966+
overlap_data["removed"]["psms"] = -len(psms_before - psms_after)
967+
overlap_data["retained"]["psms"] = len(psms_after.intersection(psms_before))
968+
overlap_data["gained"]["psms"] = len(psms_after - psms_before)
969+
970+
# Peptide level
971+
if "peptidoform" in psm_df.columns:
972+
peptides_before = set(
973+
targets[targets["qvalue_before"] <= fdr_threshold]["peptidoform"].unique()
974+
)
975+
peptides_after = set(
976+
targets[targets["qvalue_after"] <= fdr_threshold]["peptidoform"].unique()
977+
)
978+
979+
overlap_data["removed"]["peptides"] = -len(peptides_before - peptides_after)
980+
overlap_data["retained"]["peptides"] = len(peptides_after.intersection(peptides_before))
981+
overlap_data["gained"]["peptides"] = len(peptides_after - peptides_before)
982+
983+
colors = ["#953331", "#316395", "#319545"]
984+
levels = list(overlap_data["retained"].keys())
985+
fig = plotly.subplots.make_subplots(rows=len(levels), cols=1)
986+
987+
for i, level in enumerate(levels):
988+
for (item, data), color in zip(overlap_data.items(), colors):
989+
if level not in data:
990+
continue
991+
fig.add_trace(
992+
go.Bar(
993+
y=[level],
994+
x=[data[level]],
995+
marker={"color": color},
996+
orientation="h",
997+
name=item,
998+
showlegend=True if i == 0 else False,
999+
),
1000+
row=i + 1,
1001+
col=1,
1002+
)
1003+
fig.update_layout(barmode="relative")
1004+
1005+
return fig

0 commit comments

Comments
 (0)