Skip to content

Commit 629d00c

Browse files
Merge pull request #123 from MannLabs/pg-reader
Protein Group Reader
2 parents cac98e1 + 140b0a8 commit 629d00c

File tree

6 files changed

+277
-12
lines changed

6 files changed

+277
-12
lines changed

docs/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Automatically extract a controlled vocabulary of image metadata.
4747
available_reader
4848
parse_df
4949
read_precursor_table
50+
read_pg_table
5051
```
5152

5253
## Write

src/dvpio/_utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,45 @@ def wrapper(*args, **kwargs):
3131
return func(*args, **kwargs)
3232

3333
return wrapper
34+
35+
36+
def deprecated_docs(func):
37+
"""Decorator to mark a function as deprecated in the docstring."""
38+
func.__doc__ = f"""**Warning: This function is deprecated and will be removed in the next minor release**\n\n
39+
{func.__doc__ or ""}"""
40+
return func
41+
42+
43+
def deprecated_log(message=None):
44+
"""Decorator to mark a function as deprecated with a warning log.
45+
46+
Parameters
47+
----------
48+
message
49+
Optional custom deprecation message. If not provided, uses default format.
50+
"""
51+
52+
def decorator(func):
53+
@functools.wraps(func)
54+
def wrapper(*args, **kwargs):
55+
if message is None:
56+
warning_message = f"Function {func.__name__} is deprecated and will be removed in future versions."
57+
else:
58+
warning_message = message
59+
60+
warnings.warn(
61+
warning_message,
62+
category=DeprecationWarning,
63+
stacklevel=2,
64+
)
65+
return func(*args, **kwargs)
66+
67+
return wrapper
68+
69+
# Handle both @deprecated_log and @deprecated_log() usage
70+
if callable(message):
71+
func = message
72+
message = None
73+
return decorator(func)
74+
75+
return decorator

src/dvpio/read/omics/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .report_reader import available_reader, parse_df, read_precursor_table
1+
from .report_reader import available_reader, parse_df, read_pg_table, read_precursor_table
22

3-
__all__ = ["available_reader", "parse_df", "read_precursor_table"]
3+
__all__ = ["available_reader", "parse_df", "read_precursor_table", "read_pg_table"]

src/dvpio/read/omics/report_reader.py

Lines changed: 131 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,34 @@
11
from collections.abc import Mapping
2-
from typing import Any
2+
from typing import Any, Literal
33

44
import anndata as ad
55
import pandas as pd
6+
from alphabase.pg_reader.pg_reader import pg_reader_provider
67
from alphabase.psm_reader.psm_reader import psm_reader_provider
78
from spatialdata.models import TableModel
89

9-
from dvpio._utils import experimental_docs, experimental_log
10+
from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log
1011

1112
from ._anndata import AnnDataFactory
1213

14+
SAMPLE_ID_NAME: str = "sample_id"
1315

14-
def available_reader() -> list[str]:
15-
"""Get a list of all available readers, as provided by alphabase"""
16-
return sorted(psm_reader_provider.reader_dict.keys())
16+
17+
def available_reader(reader_type: Literal["psm_reader", "pg_reader"] = "psm_reader") -> list[str]:
18+
"""Get a list of all available readers, as provided by alphabase
19+
20+
Parameters
21+
----------
22+
reader_type
23+
Whether to return readers for peptice spectrum matches (`psm_reader`) or protein group
24+
intensities (`pg_reader`)
25+
"""
26+
if reader_type == "psm_reader":
27+
return sorted(psm_reader_provider.reader_dict.keys())
28+
elif reader_type == "pg_reader":
29+
return sorted(pg_reader_provider.reader_dict.keys())
30+
else:
31+
raise KeyError(f"Pass either `psm_reader` or `pg_reader`, not {reader_type}")
1732

1833

1934
def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None = None) -> pd.DataFrame:
@@ -43,6 +58,10 @@ def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None =
4358
return df
4459

4560

61+
@deprecated_log(
62+
"This function is deprecated and will be removed in the next minor release. Use `dvpio.read.omics.read_pg_table` instead."
63+
)
64+
@deprecated_docs
4665
def parse_df(
4766
df: pd.DataFrame, obs_index: str | None = None, var_index: str | None = None, **table_kwargs
4867
) -> ad.AnnData:
@@ -192,3 +211,110 @@ def read_precursor_table(
192211
adata = factory.create_anndata()
193212

194213
return TableModel.parse(adata, **kwargs)
214+
215+
216+
def read_pg_table(
217+
path: str,
218+
search_engine: str,
219+
*,
220+
column_mapping: dict[str, Any] | None = None,
221+
measurement_regex: str | None = None,
222+
reader_provider_kwargs: dict | None = None,
223+
**kwargs: Any,
224+
) -> TableModel:
225+
"""Read protein group table to the :class:`anndata.AnnData` format
226+
227+
Read (features x observations) protein group matrices from proteomics search engines into
228+
the :class:`anndata.AnnData` format (observations x features). Per default,
229+
raw intensities are returned, which can be modified dependening on the search engine.
230+
231+
Supported formats include
232+
233+
- AlphaDIA (`alphadia`)
234+
- AlphaPept (`alphapept`, csv+hdf)
235+
- DIANN (`diann`)
236+
- MaxQuant (`maxquant`)
237+
- Spectronaut (`spectronaut`, parquet + tsv)
238+
239+
see :func:`dvpio.read.omics.available_reader` for a complete list.
240+
241+
See `alphabase.pg_reader` module for more information
242+
243+
Parameters
244+
----------
245+
path
246+
Path to protein group matrix
247+
reader_type
248+
Name of engine output, pass the method name of the corresponding reader. You can
249+
list all available readers with the :func:`dvpio.read.omics.available_reader` helper function
250+
column_mapping
251+
A dictionary of mapping alphabase columns (keys) to the corresponding columns in the other
252+
search engine (values). If `None` will be loaded from the `column_mapping` key of the respective
253+
search engine in `pg_reader.yaml`. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`.
254+
measurement_regex
255+
Regular expression that identifies correct measurement type. Only relevant if PG matrix contains multiple
256+
measurement types. For example, alphapept returns the raw protein intensity per sample in column `A` and the
257+
LFQ corrected value in `A_LFQ`. If `None` uses all columns. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`.
258+
reader_provider_kwargs
259+
Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`
260+
kwargs
261+
Passed to :meth:`spatialdata.models.TableModel.parse`
262+
263+
Returns
264+
-------
265+
:class:`anndata.AnnData`
266+
AnnData object that can be further processed with scVerse packages.
267+
268+
- adata.X
269+
Stores values of the intensity columns in the report of shape observations x features
270+
- adata.obs
271+
Stores observations with protein group matrix sample names as `sample_id` column.
272+
- adata.var
273+
Stores features and feature metadata.
274+
275+
Example
276+
-------
277+
278+
.. code-block:: python
279+
280+
from dvpio.io.read.omics import read_report
281+
282+
alphadia_path = ...
283+
adata = read_pg_table(alphadia_path, reader_type="alphadia")
284+
285+
maxquant_path = ...
286+
# Read LFQ values from MaxQuant report
287+
adata = read_pg_table(maxquant_path, reader_type="maxquant", measurement_regex="lfq")
288+
289+
Get available regular expressions
290+
291+
.. code-block:: python
292+
293+
from alphabase.pg_reader import pg_reader_provider
294+
295+
alphapept_reader = pg_reader_provider.get_reader("alphapept")
296+
alphapept_reader.get_preconfigured_regex()
297+
> {'raw': '^.*(?<!_LFQ)$', 'lfq': '_LFQ$'}
298+
299+
See Also
300+
--------
301+
:mod:`alphabase.pg_reader`
302+
"""
303+
# Build reader_provider_kwargs
304+
# This assures that the default values of the readers are considered (e.g. if `column_mapping="raw"`)
305+
reader_provider_kwargs = {} if reader_provider_kwargs is None else reader_provider_kwargs
306+
if column_mapping is not None:
307+
reader_provider_kwargs["column_mapping"] = column_mapping
308+
if measurement_regex is not None:
309+
reader_provider_kwargs["measurement_regex"] = measurement_regex
310+
311+
reader = pg_reader_provider.get_reader(search_engine, **reader_provider_kwargs)
312+
# Features x Observations
313+
df = reader.import_file(path)
314+
315+
# Observations x Features
316+
adata = ad.AnnData(
317+
X=df.values.T, var=df.index.to_frame(index=False), obs=df.columns.to_frame(index=False, name=SAMPLE_ID_NAME)
318+
)
319+
320+
return TableModel.parse(adata, **kwargs)

tests/read/omics/test_report_reader.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import numpy as np
22
import pandas as pd
33
import pytest
4+
from alphabase.pg_reader.pg_reader import pg_reader_provider
45
from alphabase.psm_reader.psm_reader import psm_reader_provider
56
from spatialdata.models import TableModel
67

7-
from dvpio.read.omics import available_reader, parse_df, read_precursor_table
8+
from dvpio.read.omics import available_reader, parse_df, read_pg_table, read_precursor_table
89
from dvpio.read.omics.report_reader import _parse_pandas_index
910

1011

@@ -57,10 +58,14 @@ def alphadia_pg_report() -> pd.DataFrame:
5758
return df.T
5859

5960

60-
def test_available_reader() -> None:
61-
list_of_available_reader = available_reader()
61+
@pytest.mark.parametrize(("reader_type",), [("psm_reader",), ("pg_reader",)])
62+
def test_available_reader(reader_type: str) -> None:
63+
list_of_available_reader = available_reader(reader_type)
6264

63-
assert len(list_of_available_reader) == len(psm_reader_provider.reader_dict)
65+
if reader_type == "psm_reader":
66+
assert len(list_of_available_reader) == len(psm_reader_provider.reader_dict)
67+
elif reader_type == "pg_reader":
68+
assert len(list_of_available_reader) == len(pg_reader_provider.reader_dict)
6469
assert "alphadia" in list_of_available_reader
6570

6671

@@ -228,3 +233,21 @@ def test_parse_df_real_data(
228233
def test_read_precursor_table(path: str, reader_type: str, func_kwargs: dict, shape: tuple[int]) -> None:
229234
adata = read_precursor_table(path, reader_type=reader_type, **func_kwargs)
230235
assert adata.shape == shape
236+
237+
238+
@pytest.mark.parametrize(
239+
("path", "reader_type", "func_kwargs", "shape", "var_shape"),
240+
[
241+
("./data/omics/alphadia/alphadia.protein-group.tsv", "alphadia", {}, (3, 7497), (7497, 1)),
242+
("./data/omics/alphapept/alphapept.protein-group.csv", "alphapept", {}, (2, 3781), (3781, 5)),
243+
],
244+
)
245+
def test_read_pg_table(
246+
path: str, reader_type: str, func_kwargs: dict, shape: tuple[int], var_shape: tuple[int]
247+
) -> None:
248+
"""Test read pg table with real data"""
249+
250+
adata = read_pg_table(path, search_engine=reader_type, **func_kwargs)
251+
252+
assert adata.shape == shape
253+
assert adata.var.shape == var_shape

tests/test_utils.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from dvpio._utils import experimental_docs, experimental_log, is_parsed
3+
from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log, is_parsed
44

55

66
@pytest.fixture()
@@ -29,3 +29,76 @@ def test_experimental_log(function_factory):
2929

3030
with pytest.warns(UserWarning, match="is experimental and may change"):
3131
sample_func()
32+
33+
34+
def test_deprecated_docs(function_factory):
35+
sample_func = deprecated_docs(function_factory)
36+
37+
assert "Warning: This function is deprecated" in sample_func.__doc__
38+
39+
40+
def test_deprecated_log_default_message(function_factory):
41+
"""Test deprecated_log with default message using @ syntax."""
42+
sample_func = deprecated_log()(function_factory)
43+
44+
with pytest.warns(
45+
DeprecationWarning, match="Function sample_func is deprecated and will be removed in future versions"
46+
):
47+
sample_func()
48+
49+
50+
def test_deprecated_log_custom_message(function_factory):
51+
"""Test deprecated_log with custom message."""
52+
custom_message = "This function is obsolete. Use new_function() instead."
53+
sample_func = deprecated_log(custom_message)(function_factory)
54+
55+
with pytest.warns(DeprecationWarning, match="This function is obsolete. Use new_function\\(\\) instead."):
56+
sample_func()
57+
58+
59+
def test_deprecated_log_preserves_function_metadata(function_factory):
60+
"""Test that deprecated_log preserves function name and docstring."""
61+
sample_func = deprecated_log(function_factory)
62+
63+
assert sample_func.__name__ == function_factory.__name__
64+
assert sample_func.__doc__ == function_factory.__doc__
65+
66+
67+
def test_deprecated_log_preserves_return_value(function_factory):
68+
"""Test that deprecated_log preserves function return value."""
69+
70+
def func_with_return():
71+
return "test_value"
72+
73+
decorated_func = deprecated_log(func_with_return)
74+
75+
with pytest.warns(DeprecationWarning):
76+
result = decorated_func()
77+
78+
assert result == "test_value"
79+
80+
81+
def test_deprecated_log_preserves_arguments():
82+
"""Test that deprecated_log passes through arguments correctly."""
83+
84+
def func_with_args(a, b, c=None):
85+
return (a, b, c)
86+
87+
decorated_func = deprecated_log(func_with_args)
88+
89+
with pytest.warns(DeprecationWarning):
90+
result = decorated_func(1, 2, c=3)
91+
92+
assert result == (1, 2, 3)
93+
94+
95+
def test_deprecated_log_warning_category():
96+
"""Test that deprecated_log uses correct warning category."""
97+
98+
def sample_func():
99+
pass
100+
101+
decorated_func = deprecated_log(sample_func)
102+
103+
with pytest.warns(DeprecationWarning):
104+
decorated_func()

0 commit comments

Comments
 (0)