diff --git a/docs/api.md b/docs/api.md index 9865481..0fc99db 100644 --- a/docs/api.md +++ b/docs/api.md @@ -47,6 +47,7 @@ Automatically extract a controlled vocabulary of image metadata. available_reader parse_df read_precursor_table + read_pg_table ``` ## Write diff --git a/src/dvpio/_utils.py b/src/dvpio/_utils.py index d28475b..757e977 100644 --- a/src/dvpio/_utils.py +++ b/src/dvpio/_utils.py @@ -31,3 +31,45 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper + + +def deprecated_docs(func): + """Decorator to mark a function as deprecated in the docstring.""" + func.__doc__ = f"""**Warning: This function is deprecated and will be removed in the next minor release**\n\n + {func.__doc__ or ""}""" + return func + + +def deprecated_log(message=None): + """Decorator to mark a function as deprecated with a warning log. + + Parameters + ---------- + message + Optional custom deprecation message. If not provided, uses default format. + """ + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if message is None: + warning_message = f"Function {func.__name__} is deprecated and will be removed in future versions." + else: + warning_message = message + + warnings.warn( + warning_message, + category=DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + # Handle both @deprecated_log and @deprecated_log() usage + if callable(message): + func = message + message = None + return decorator(func) + + return decorator diff --git a/src/dvpio/read/omics/__init__.py b/src/dvpio/read/omics/__init__.py index 16e0f0c..31a717f 100644 --- a/src/dvpio/read/omics/__init__.py +++ b/src/dvpio/read/omics/__init__.py @@ -1,3 +1,3 @@ -from .report_reader import available_reader, parse_df, read_precursor_table +from .report_reader import available_reader, parse_df, read_pg_table, read_precursor_table -__all__ = ["available_reader", "parse_df", "read_precursor_table"] +__all__ = ["available_reader", "parse_df", "read_precursor_table", "read_pg_table"] diff --git a/src/dvpio/read/omics/report_reader.py b/src/dvpio/read/omics/report_reader.py index 943d454..e45e7df 100644 --- a/src/dvpio/read/omics/report_reader.py +++ b/src/dvpio/read/omics/report_reader.py @@ -1,19 +1,34 @@ from collections.abc import Mapping -from typing import Any +from typing import Any, Literal import anndata as ad import pandas as pd +from alphabase.pg_reader.pg_reader import pg_reader_provider from alphabase.psm_reader.psm_reader import psm_reader_provider from spatialdata.models import TableModel -from dvpio._utils import experimental_docs, experimental_log +from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log from ._anndata import AnnDataFactory +SAMPLE_ID_NAME: str = "sample_id" -def available_reader() -> list[str]: - """Get a list of all available readers, as provided by alphabase""" - return sorted(psm_reader_provider.reader_dict.keys()) + +def available_reader(reader_type: Literal["psm_reader", "pg_reader"] = "psm_reader") -> list[str]: + """Get a list of all available readers, as provided by alphabase + + Parameters + ---------- + reader_type + Whether to return readers for peptice spectrum matches (`psm_reader`) or protein group + intensities (`pg_reader`) + """ + if reader_type == "psm_reader": + return sorted(psm_reader_provider.reader_dict.keys()) + elif reader_type == "pg_reader": + return sorted(pg_reader_provider.reader_dict.keys()) + else: + raise KeyError(f"Pass either `psm_reader` or `pg_reader`, not {reader_type}") def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None = None) -> pd.DataFrame: @@ -43,6 +58,10 @@ def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None = return df +@deprecated_log( + "This function is deprecated and will be removed in the next minor release. Use `dvpio.read.omics.read_pg_table` instead." +) +@deprecated_docs def parse_df( df: pd.DataFrame, obs_index: str | None = None, var_index: str | None = None, **table_kwargs ) -> ad.AnnData: @@ -192,3 +211,110 @@ def read_precursor_table( adata = factory.create_anndata() return TableModel.parse(adata, **kwargs) + + +def read_pg_table( + path: str, + search_engine: str, + *, + column_mapping: dict[str, Any] | None = None, + measurement_regex: str | None = None, + reader_provider_kwargs: dict | None = None, + **kwargs: Any, +) -> TableModel: + """Read protein group table to the :class:`anndata.AnnData` format + + Read (features x observations) protein group matrices from proteomics search engines into + the :class:`anndata.AnnData` format (observations x features). Per default, + raw intensities are returned, which can be modified dependening on the search engine. + + Supported formats include + + - AlphaDIA (`alphadia`) + - AlphaPept (`alphapept`, csv+hdf) + - DIANN (`diann`) + - MaxQuant (`maxquant`) + - Spectronaut (`spectronaut`, parquet + tsv) + + see :func:`dvpio.read.omics.available_reader` for a complete list. + + See `alphabase.pg_reader` module for more information + + Parameters + ---------- + path + Path to protein group matrix + reader_type + Name of engine output, pass the method name of the corresponding reader. You can + list all available readers with the :func:`dvpio.read.omics.available_reader` helper function + column_mapping + A dictionary of mapping alphabase columns (keys) to the corresponding columns in the other + search engine (values). If `None` will be loaded from the `column_mapping` key of the respective + search engine in `pg_reader.yaml`. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`. + measurement_regex + Regular expression that identifies correct measurement type. Only relevant if PG matrix contains multiple + measurement types. For example, alphapept returns the raw protein intensity per sample in column `A` and the + LFQ corrected value in `A_LFQ`. If `None` uses all columns. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`. + reader_provider_kwargs + Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader` + kwargs + Passed to :meth:`spatialdata.models.TableModel.parse` + + Returns + ------- + :class:`anndata.AnnData` + AnnData object that can be further processed with scVerse packages. + + - adata.X + Stores values of the intensity columns in the report of shape observations x features + - adata.obs + Stores observations with protein group matrix sample names as `sample_id` column. + - adata.var + Stores features and feature metadata. + + Example + ------- + + .. code-block:: python + + from dvpio.io.read.omics import read_report + + alphadia_path = ... + adata = read_pg_table(alphadia_path, reader_type="alphadia") + + maxquant_path = ... + # Read LFQ values from MaxQuant report + adata = read_pg_table(maxquant_path, reader_type="maxquant", measurement_regex="lfq") + + Get available regular expressions + + .. code-block:: python + + from alphabase.pg_reader import pg_reader_provider + + alphapept_reader = pg_reader_provider.get_reader("alphapept") + alphapept_reader.get_preconfigured_regex() + > {'raw': '^.*(? pd.DataFrame: return df.T -def test_available_reader() -> None: - list_of_available_reader = available_reader() +@pytest.mark.parametrize(("reader_type",), [("psm_reader",), ("pg_reader",)]) +def test_available_reader(reader_type: str) -> None: + list_of_available_reader = available_reader(reader_type) - assert len(list_of_available_reader) == len(psm_reader_provider.reader_dict) + if reader_type == "psm_reader": + assert len(list_of_available_reader) == len(psm_reader_provider.reader_dict) + elif reader_type == "pg_reader": + assert len(list_of_available_reader) == len(pg_reader_provider.reader_dict) assert "alphadia" in list_of_available_reader @@ -228,3 +233,21 @@ def test_parse_df_real_data( def test_read_precursor_table(path: str, reader_type: str, func_kwargs: dict, shape: tuple[int]) -> None: adata = read_precursor_table(path, reader_type=reader_type, **func_kwargs) assert adata.shape == shape + + +@pytest.mark.parametrize( + ("path", "reader_type", "func_kwargs", "shape", "var_shape"), + [ + ("./data/omics/alphadia/alphadia.protein-group.tsv", "alphadia", {}, (3, 7497), (7497, 1)), + ("./data/omics/alphapept/alphapept.protein-group.csv", "alphapept", {}, (2, 3781), (3781, 5)), + ], +) +def test_read_pg_table( + path: str, reader_type: str, func_kwargs: dict, shape: tuple[int], var_shape: tuple[int] +) -> None: + """Test read pg table with real data""" + + adata = read_pg_table(path, search_engine=reader_type, **func_kwargs) + + assert adata.shape == shape + assert adata.var.shape == var_shape diff --git a/tests/test_utils.py b/tests/test_utils.py index 54073fa..8a93893 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ import pytest -from dvpio._utils import experimental_docs, experimental_log, is_parsed +from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log, is_parsed @pytest.fixture() @@ -29,3 +29,76 @@ def test_experimental_log(function_factory): with pytest.warns(UserWarning, match="is experimental and may change"): sample_func() + + +def test_deprecated_docs(function_factory): + sample_func = deprecated_docs(function_factory) + + assert "Warning: This function is deprecated" in sample_func.__doc__ + + +def test_deprecated_log_default_message(function_factory): + """Test deprecated_log with default message using @ syntax.""" + sample_func = deprecated_log()(function_factory) + + with pytest.warns( + DeprecationWarning, match="Function sample_func is deprecated and will be removed in future versions" + ): + sample_func() + + +def test_deprecated_log_custom_message(function_factory): + """Test deprecated_log with custom message.""" + custom_message = "This function is obsolete. Use new_function() instead." + sample_func = deprecated_log(custom_message)(function_factory) + + with pytest.warns(DeprecationWarning, match="This function is obsolete. Use new_function\\(\\) instead."): + sample_func() + + +def test_deprecated_log_preserves_function_metadata(function_factory): + """Test that deprecated_log preserves function name and docstring.""" + sample_func = deprecated_log(function_factory) + + assert sample_func.__name__ == function_factory.__name__ + assert sample_func.__doc__ == function_factory.__doc__ + + +def test_deprecated_log_preserves_return_value(function_factory): + """Test that deprecated_log preserves function return value.""" + + def func_with_return(): + return "test_value" + + decorated_func = deprecated_log(func_with_return) + + with pytest.warns(DeprecationWarning): + result = decorated_func() + + assert result == "test_value" + + +def test_deprecated_log_preserves_arguments(): + """Test that deprecated_log passes through arguments correctly.""" + + def func_with_args(a, b, c=None): + return (a, b, c) + + decorated_func = deprecated_log(func_with_args) + + with pytest.warns(DeprecationWarning): + result = decorated_func(1, 2, c=3) + + assert result == (1, 2, 3) + + +def test_deprecated_log_warning_category(): + """Test that deprecated_log uses correct warning category.""" + + def sample_func(): + pass + + decorated_func = deprecated_log(sample_func) + + with pytest.warns(DeprecationWarning): + decorated_func()