Skip to content

Commit 120975c

Browse files
authored
Merge pull request #162 from singjc/update/export_scored_report
Update/export scored report
2 parents fd45a5f + 7684484 commit 120975c

File tree

4 files changed

+355
-58
lines changed

4 files changed

+355
-58
lines changed

pyprophet/export/export_report.py

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import sqlite3
22
import pandas as pd
33

4+
5+
from .._config import ExportIOConfig
46
from ..report import post_scoring_report
7+
from ..io.dispatcher import ReaderDispatcher
58
from ..io.util import get_parquet_column_names
69
from ..io.util import check_sqlite_table
710
from ..report import plot_scores
@@ -130,35 +133,19 @@ def export_scored_report(
130133
The format of the scoring report, either 'osw' or 'parquet'. Default is 'osw'.
131134
"""
132135

133-
cols_infile = get_parquet_column_names(infile)
134-
135-
select_cols = [
136-
"RUN_ID",
137-
"PROTEIN_ID",
138-
"PEPTIDE_ID",
139-
"PRECURSOR_ID",
140-
"PRECURSOR_DECOY",
141-
"FEATURE_MS2_AREA_INTENSITY",
142-
"SCORE_MS2_SCORE",
143-
"SCORE_MS2_PEAK_GROUP_RANK",
144-
"SCORE_MS2_Q_VALUE",
145-
"SCORE_PEPTIDE_GLOBAL_SCORE",
146-
"SCORE_PEPTIDE_GLOBAL_Q_VALUE",
147-
"SCORE_PEPTIDE_EXPERIMENT_WIDE_SCORE",
148-
"SCORE_PEPTIDE_EXPERIMENT_WIDE_Q_VALUE",
149-
"SCORE_PEPTIDE_RUN_SPECIFIC_SCORE",
150-
"SCORE_PEPTIDE_RUN_SPECIFIC_Q_VALUE",
151-
"SCORE_PROTEIN_GLOBAL_SCORE",
152-
"SCORE_PROTEIN_GLOBAL_Q_VALUE",
153-
"SCORE_PROTEIN_EXPERIMENT_WIDE_SCORE",
154-
"SCORE_PROTEIN_EXPERIMENT_WIDE_Q_VALUE",
155-
"SCORE_IPF_QVALUE",
156-
]
136+
config = ExportIOConfig(
137+
infile=infile,
138+
outfile=outfile,
139+
subsample_ratio=1.0, # not used for export/report paths
140+
level="export",
141+
context="export_scored_report",
142+
# no need to set export_format for this utility
143+
)
157144

158-
# Filter select cols based on available columns in the input file
159-
select_cols = [col for col in select_cols if col in cols_infile]
145+
# Get the right reader for the detected file type & context.
146+
reader = ReaderDispatcher.get_reader(config)
160147

161-
# Load the input data
162-
df = pd.read_parquet(infile, columns=select_cols)
148+
# Read once (works for OSW or Parquet via their respective readers).
149+
df = reader.read()
163150

164151
post_scoring_report(df, outfile)

0 commit comments

Comments
 (0)