|
1 | 1 | from collections.abc import Mapping |
2 | | -from typing import Any |
| 2 | +from typing import Any, Literal |
3 | 3 |
|
4 | 4 | import anndata as ad |
5 | 5 | import pandas as pd |
| 6 | +from alphabase.pg_reader.pg_reader import pg_reader_provider |
6 | 7 | from alphabase.psm_reader.psm_reader import psm_reader_provider |
7 | 8 | from spatialdata.models import TableModel |
8 | 9 |
|
9 | | -from dvpio._utils import experimental_docs, experimental_log |
| 10 | +from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log |
10 | 11 |
|
11 | 12 | from ._anndata import AnnDataFactory |
12 | 13 |
|
| 14 | +SAMPLE_ID_NAME: str = "sample_id" |
13 | 15 |
|
14 | | -def available_reader() -> list[str]: |
15 | | - """Get a list of all available readers, as provided by alphabase""" |
16 | | - return sorted(psm_reader_provider.reader_dict.keys()) |
| 16 | + |
| 17 | +def available_reader(reader_type: Literal["psm_reader", "pg_reader"] = "psm_reader") -> list[str]: |
| 18 | + """Get a list of all available readers, as provided by alphabase |
| 19 | +
|
| 20 | + Parameters |
| 21 | + ---------- |
| 22 | + reader_type |
| 23 | + Whether to return readers for peptice spectrum matches (`psm_reader`) or protein group |
| 24 | + intensities (`pg_reader`) |
| 25 | + """ |
| 26 | + if reader_type == "psm_reader": |
| 27 | + return sorted(psm_reader_provider.reader_dict.keys()) |
| 28 | + elif reader_type == "pg_reader": |
| 29 | + return sorted(pg_reader_provider.reader_dict.keys()) |
| 30 | + else: |
| 31 | + raise KeyError(f"Pass either `psm_reader` or `pg_reader`, not {reader_type}") |
17 | 32 |
|
18 | 33 |
|
19 | 34 | def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None = None) -> pd.DataFrame: |
@@ -43,6 +58,10 @@ def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None = |
43 | 58 | return df |
44 | 59 |
|
45 | 60 |
|
| 61 | +@deprecated_log( |
| 62 | + "This function is deprecated and will be removed in the next minor release. Use `dvpio.read.omics.read_pg_table` instead." |
| 63 | +) |
| 64 | +@deprecated_docs |
46 | 65 | def parse_df( |
47 | 66 | df: pd.DataFrame, obs_index: str | None = None, var_index: str | None = None, **table_kwargs |
48 | 67 | ) -> ad.AnnData: |
@@ -192,3 +211,110 @@ def read_precursor_table( |
192 | 211 | adata = factory.create_anndata() |
193 | 212 |
|
194 | 213 | return TableModel.parse(adata, **kwargs) |
| 214 | + |
| 215 | + |
| 216 | +def read_pg_table( |
| 217 | + path: str, |
| 218 | + search_engine: str, |
| 219 | + *, |
| 220 | + column_mapping: dict[str, Any] | None = None, |
| 221 | + measurement_regex: str | None = None, |
| 222 | + reader_provider_kwargs: dict | None = None, |
| 223 | + **kwargs: Any, |
| 224 | +) -> TableModel: |
| 225 | + """Read protein group table to the :class:`anndata.AnnData` format |
| 226 | +
|
| 227 | + Read (features x observations) protein group matrices from proteomics search engines into |
| 228 | + the :class:`anndata.AnnData` format (observations x features). Per default, |
| 229 | + raw intensities are returned, which can be modified dependening on the search engine. |
| 230 | +
|
| 231 | + Supported formats include |
| 232 | +
|
| 233 | + - AlphaDIA (`alphadia`) |
| 234 | + - AlphaPept (`alphapept`, csv+hdf) |
| 235 | + - DIANN (`diann`) |
| 236 | + - MaxQuant (`maxquant`) |
| 237 | + - Spectronaut (`spectronaut`, parquet + tsv) |
| 238 | +
|
| 239 | + see :func:`dvpio.read.omics.available_reader` for a complete list. |
| 240 | +
|
| 241 | + See `alphabase.pg_reader` module for more information |
| 242 | +
|
| 243 | + Parameters |
| 244 | + ---------- |
| 245 | + path |
| 246 | + Path to protein group matrix |
| 247 | + reader_type |
| 248 | + Name of engine output, pass the method name of the corresponding reader. You can |
| 249 | + list all available readers with the :func:`dvpio.read.omics.available_reader` helper function |
| 250 | + column_mapping |
| 251 | + A dictionary of mapping alphabase columns (keys) to the corresponding columns in the other |
| 252 | + search engine (values). If `None` will be loaded from the `column_mapping` key of the respective |
| 253 | + search engine in `pg_reader.yaml`. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`. |
| 254 | + measurement_regex |
| 255 | + Regular expression that identifies correct measurement type. Only relevant if PG matrix contains multiple |
| 256 | + measurement types. For example, alphapept returns the raw protein intensity per sample in column `A` and the |
| 257 | + LFQ corrected value in `A_LFQ`. If `None` uses all columns. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`. |
| 258 | + reader_provider_kwargs |
| 259 | + Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader` |
| 260 | + kwargs |
| 261 | + Passed to :meth:`spatialdata.models.TableModel.parse` |
| 262 | +
|
| 263 | + Returns |
| 264 | + ------- |
| 265 | + :class:`anndata.AnnData` |
| 266 | + AnnData object that can be further processed with scVerse packages. |
| 267 | +
|
| 268 | + - adata.X |
| 269 | + Stores values of the intensity columns in the report of shape observations x features |
| 270 | + - adata.obs |
| 271 | + Stores observations with protein group matrix sample names as `sample_id` column. |
| 272 | + - adata.var |
| 273 | + Stores features and feature metadata. |
| 274 | +
|
| 275 | + Example |
| 276 | + ------- |
| 277 | +
|
| 278 | + .. code-block:: python |
| 279 | +
|
| 280 | + from dvpio.io.read.omics import read_report |
| 281 | +
|
| 282 | + alphadia_path = ... |
| 283 | + adata = read_pg_table(alphadia_path, reader_type="alphadia") |
| 284 | +
|
| 285 | + maxquant_path = ... |
| 286 | + # Read LFQ values from MaxQuant report |
| 287 | + adata = read_pg_table(maxquant_path, reader_type="maxquant", measurement_regex="lfq") |
| 288 | +
|
| 289 | + Get available regular expressions |
| 290 | +
|
| 291 | + .. code-block:: python |
| 292 | +
|
| 293 | + from alphabase.pg_reader import pg_reader_provider |
| 294 | +
|
| 295 | + alphapept_reader = pg_reader_provider.get_reader("alphapept") |
| 296 | + alphapept_reader.get_preconfigured_regex() |
| 297 | + > {'raw': '^.*(?<!_LFQ)$', 'lfq': '_LFQ$'} |
| 298 | +
|
| 299 | + See Also |
| 300 | + -------- |
| 301 | + :mod:`alphabase.pg_reader` |
| 302 | + """ |
| 303 | + # Build reader_provider_kwargs |
| 304 | + # This assures that the default values of the readers are considered (e.g. if `column_mapping="raw"`) |
| 305 | + reader_provider_kwargs = {} if reader_provider_kwargs is None else reader_provider_kwargs |
| 306 | + if column_mapping is not None: |
| 307 | + reader_provider_kwargs["column_mapping"] = column_mapping |
| 308 | + if measurement_regex is not None: |
| 309 | + reader_provider_kwargs["measurement_regex"] = measurement_regex |
| 310 | + |
| 311 | + reader = pg_reader_provider.get_reader(search_engine, **reader_provider_kwargs) |
| 312 | + # Features x Observations |
| 313 | + df = reader.import_file(path) |
| 314 | + |
| 315 | + # Observations x Features |
| 316 | + adata = ad.AnnData( |
| 317 | + X=df.values.T, var=df.index.to_frame(index=False), obs=df.columns.to_frame(index=False, name=SAMPLE_ID_NAME) |
| 318 | + ) |
| 319 | + |
| 320 | + return TableModel.parse(adata, **kwargs) |
0 commit comments