|
1 | 1 | import sqlite3 |
2 | 2 | import pandas as pd |
3 | 3 |
|
| 4 | + |
| 5 | +from .._config import ExportIOConfig |
4 | 6 | from ..report import post_scoring_report |
| 7 | +from ..io.dispatcher import ReaderDispatcher |
5 | 8 | from ..io.util import get_parquet_column_names |
6 | 9 | from ..io.util import check_sqlite_table |
7 | 10 | from ..report import plot_scores |
@@ -130,35 +133,19 @@ def export_scored_report( |
130 | 133 | The format of the scoring report, either 'osw' or 'parquet'. Default is 'osw'. |
131 | 134 | """ |
132 | 135 |
|
133 | | - cols_infile = get_parquet_column_names(infile) |
134 | | - |
135 | | - select_cols = [ |
136 | | - "RUN_ID", |
137 | | - "PROTEIN_ID", |
138 | | - "PEPTIDE_ID", |
139 | | - "PRECURSOR_ID", |
140 | | - "PRECURSOR_DECOY", |
141 | | - "FEATURE_MS2_AREA_INTENSITY", |
142 | | - "SCORE_MS2_SCORE", |
143 | | - "SCORE_MS2_PEAK_GROUP_RANK", |
144 | | - "SCORE_MS2_Q_VALUE", |
145 | | - "SCORE_PEPTIDE_GLOBAL_SCORE", |
146 | | - "SCORE_PEPTIDE_GLOBAL_Q_VALUE", |
147 | | - "SCORE_PEPTIDE_EXPERIMENT_WIDE_SCORE", |
148 | | - "SCORE_PEPTIDE_EXPERIMENT_WIDE_Q_VALUE", |
149 | | - "SCORE_PEPTIDE_RUN_SPECIFIC_SCORE", |
150 | | - "SCORE_PEPTIDE_RUN_SPECIFIC_Q_VALUE", |
151 | | - "SCORE_PROTEIN_GLOBAL_SCORE", |
152 | | - "SCORE_PROTEIN_GLOBAL_Q_VALUE", |
153 | | - "SCORE_PROTEIN_EXPERIMENT_WIDE_SCORE", |
154 | | - "SCORE_PROTEIN_EXPERIMENT_WIDE_Q_VALUE", |
155 | | - "SCORE_IPF_QVALUE", |
156 | | - ] |
| 136 | + config = ExportIOConfig( |
| 137 | + infile=infile, |
| 138 | + outfile=outfile, |
| 139 | + subsample_ratio=1.0, # not used for export/report paths |
| 140 | + level="export", |
| 141 | + context="export_scored_report", |
| 142 | + # no need to set export_format for this utility |
| 143 | + ) |
157 | 144 |
|
158 | | - # Filter select cols based on available columns in the input file |
159 | | - select_cols = [col for col in select_cols if col in cols_infile] |
| 145 | + # Get the right reader for the detected file type & context. |
| 146 | + reader = ReaderDispatcher.get_reader(config) |
160 | 147 |
|
161 | | - # Load the input data |
162 | | - df = pd.read_parquet(infile, columns=select_cols) |
| 148 | + # Read once (works for OSW or Parquet via their respective readers). |
| 149 | + df = reader.read() |
163 | 150 |
|
164 | 151 | post_scoring_report(df, outfile) |
0 commit comments