CompOmics
diff --git a/‎ms2rescore/report/__main__.py‎
Lines changed: 47 additions & 3 deletions b/‎ms2rescore/report/__main__.py‎
Lines changed: 47 additions & 3 deletions
diff --git a/‎ms2rescore/report/charts.py‎
Lines changed: 214 additions & 0 deletions b/‎ms2rescore/report/charts.py‎
Lines changed: 214 additions & 0 deletions
@@ -1,6 +1,8 @@
 import logging
+from pathlib import Path
 
 import click
+import psm_utils.io
 from rich.logging import RichHandler
 
 from ms2rescore.report.generate import generate_report
@@ -9,8 +11,19 @@
 
 
 @click.command()
-@click.argument("output_prefix", type=str)
-def main(**kwargs):
+@click.argument("psm_file", type=click.Path(exists=True))
+@click.option(
+    "--output",
+    "-o",
+    type=click.Path(),
+    default=None,
+    help="Output path for the report HTML file. If not provided, will be based on PSM file name.",
+)
+def main(psm_file, output):
+    """Generate MS²Rescore report from a PSM TSV file.
+
+    PSM_FILE: Path to the PSM TSV file (e.g., output.psms.tsv)
+    """
     logging.getLogger("mokapot").setLevel(logging.WARNING)
     logging.basicConfig(
         level=logging.INFO,
@@ -19,7 +32,38 @@ def main(**kwargs):
     )
 
     try:
-        generate_report(kwargs["output_prefix"])
+        psm_file_path = Path(psm_file)
+
+        # Determine output path
+        if output:
+            output_path = Path(output)
+        else:
+            # Try to infer from ms2rescore naming convention
+            if ".ms2rescore.psms.tsv" in psm_file_path.name:
+                output_prefix = str(psm_file_path).replace(".psms.tsv", "")
+            else:
+                # Use the PSM file name without extension
+                output_prefix = str(psm_file_path.with_suffix(""))
+            output_path = Path(output_prefix + ".report.html")
+
+        logger.info(f"Reading PSMs from {psm_file_path}...")
+        psm_list = psm_utils.io.read_file(psm_file_path, filetype="tsv", show_progressbar=True)
+
+        logger.info("Generating report...")
+        # Try to infer output prefix for finding other files
+        if ".ms2rescore.psms.tsv" in psm_file_path.name:
+            output_prefix = str(psm_file_path).replace(".psms.tsv", "")
+        else:
+            output_prefix = str(psm_file_path.with_suffix(""))
+
+        generate_report(
+            output_path_prefix=output_prefix,
+            psm_list=psm_list,
+            output_file=output_path,
+        )
+
+        logger.info(f"✓ Report generated: {output_path}")
+
     except Exception as e:
         logger.exception(e)
         exit(1)
 
@@ -789,3 +789,217 @@ def rt_distribution_baseline(
     )
 
     return fig
+
+
+def score_scatter_plot_df(
+    psm_df: pd.DataFrame,
+    fdr_threshold: float = 0.01,
+) -> go.Figure:
+    """
+    Plot PSM scores before and after rescoring from a dataframe.
+
+    Parameters
+    ----------
+    psm_df
+        Dataframe with PSM information including score_before, score_after,
+        qvalue_before, qvalue_after, and is_decoy columns.
+    fdr_threshold
+        FDR threshold for drawing threshold lines.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with score comparison.
+    """
+    if "score_before" not in psm_df.columns or "score_after" not in psm_df.columns:
+        figure = go.Figure()
+        figure.add_annotation(
+            text="No before/after score data available for comparison.",
+            showarrow=False,
+        )
+        return figure
+
+    # Prepare data
+    plot_df = psm_df.copy()
+    plot_df["PSM type"] = plot_df["is_decoy"].map({True: "decoy", False: "target"})
+
+    # Get score thresholds
+    try:
+        score_threshold_before = (
+            plot_df[plot_df["qvalue_before"] <= fdr_threshold]
+            .sort_values("qvalue_before", ascending=False)["score_before"]
+            .iloc[0]
+        )
+    except (IndexError, KeyError):
+        score_threshold_before = None
+
+    try:
+        score_threshold_after = (
+            plot_df[plot_df["qvalue_after"] <= fdr_threshold]
+            .sort_values("qvalue_after", ascending=False)["score_after"]
+            .iloc[0]
+        )
+    except (IndexError, KeyError):
+        score_threshold_after = None
+
+    # Plot
+    fig = px.scatter(
+        data_frame=plot_df,
+        x="score_before",
+        y="score_after",
+        color="PSM type",
+        marginal_x="histogram",
+        marginal_y="histogram",
+        opacity=0.1,
+        labels={
+            "score_before": "PSM score (before rescoring)",
+            "score_after": "PSM score (after rescoring)",
+        },
+    )
+
+    # Draw FDR thresholds
+    if score_threshold_before:
+        fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
+        fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
+    if score_threshold_after:
+        fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
+        fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
+
+    return fig
+
+
+def fdr_plot_comparison_df(
+    psm_df: pd.DataFrame,
+) -> go.Figure:
+    """
+    Plot number of identifications in function of FDR threshold before/after rescoring from dataframe.
+
+    Parameters
+    ----------
+    psm_df
+        Dataframe with PSM information including qvalue_before, qvalue_after, and is_decoy columns.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with FDR comparison.
+    """
+    if "qvalue_before" not in psm_df.columns or "qvalue_after" not in psm_df.columns:
+        figure = go.Figure()
+        figure.add_annotation(
+            text="No before/after q-value data available for comparison.",
+            showarrow=False,
+        )
+        return figure
+
+    # Filter targets only
+    targets = psm_df[~psm_df["is_decoy"]].copy()
+
+    # Prepare data in long format
+    plot_data = pd.concat(
+        [
+            targets[["qvalue_before"]]
+            .rename(columns={"qvalue_before": "q-value"})
+            .assign(**{"before/after": "before rescoring"}),
+            targets[["qvalue_after"]]
+            .rename(columns={"qvalue_after": "q-value"})
+            .assign(**{"before/after": "after rescoring"}),
+        ]
+    )
+
+    # Plot
+    fig = px.ecdf(
+        data_frame=plot_data,
+        x="q-value",
+        color="before/after",
+        log_x=True,
+        ecdfnorm=None,
+        labels={
+            "q-value": "FDR threshold",
+            "before/after": "",
+        },
+        color_discrete_map={
+            "before rescoring": "#316395",
+            "after rescoring": "#319545",
+        },
+    )
+    fig.add_vline(x=0.01, line_dash="dash", line_color="black")
+    fig.update_layout(yaxis_title="Identified PSMs")
+    return fig
+
+
+def identification_overlap_df(
+    psm_df: pd.DataFrame,
+    fdr_threshold: float = 0.01,
+) -> go.Figure:
+    """
+    Plot stacked bar charts of removed, retained, and gained PSMs and peptides from dataframe.
+
+    Parameters
+    ----------
+    psm_df
+        Dataframe with PSM information including qvalue_before, qvalue_after,
+        is_decoy, and peptidoform columns.
+    fdr_threshold
+        FDR threshold for counting identifications.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with identification overlap.
+    """
+    if "qvalue_before" not in psm_df.columns or "qvalue_after" not in psm_df.columns:
+        figure = go.Figure()
+        figure.add_annotation(
+            text="No before/after q-value data available for comparison.",
+            showarrow=False,
+        )
+        return figure
+
+    overlap_data = defaultdict(dict)
+
+    # PSM level
+    targets = psm_df[~psm_df["is_decoy"]]
+    psms_before = set(targets[targets["qvalue_before"] <= fdr_threshold].index)
+    psms_after = set(targets[targets["qvalue_after"] <= fdr_threshold].index)
+
+    overlap_data["removed"]["psms"] = -len(psms_before - psms_after)
+    overlap_data["retained"]["psms"] = len(psms_after.intersection(psms_before))
+    overlap_data["gained"]["psms"] = len(psms_after - psms_before)
+
+    # Peptide level
+    if "peptidoform" in psm_df.columns:
+        peptides_before = set(
+            targets[targets["qvalue_before"] <= fdr_threshold]["peptidoform"].unique()
+        )
+        peptides_after = set(
+            targets[targets["qvalue_after"] <= fdr_threshold]["peptidoform"].unique()
+        )
+
+        overlap_data["removed"]["peptides"] = -len(peptides_before - peptides_after)
+        overlap_data["retained"]["peptides"] = len(peptides_after.intersection(peptides_before))
+        overlap_data["gained"]["peptides"] = len(peptides_after - peptides_before)
+
+    colors = ["#953331", "#316395", "#319545"]
+    levels = list(overlap_data["retained"].keys())
+    fig = plotly.subplots.make_subplots(rows=len(levels), cols=1)
+
+    for i, level in enumerate(levels):
+        for (item, data), color in zip(overlap_data.items(), colors):
+            if level not in data:
+                continue
+            fig.add_trace(
+                go.Bar(
+                    y=[level],
+                    x=[data[level]],
+                    marker={"color": color},
+                    orientation="h",
+                    name=item,
+                    showlegend=True if i == 0 else False,
+                ),
+                row=i + 1,
+                col=1,
+            )
+    fig.update_layout(barmode="relative")
+
+    return fig