Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Automatically extract a controlled vocabulary of image metadata.
available_reader
parse_df
read_precursor_table
read_pg_table
```

## Write
Expand Down
42 changes: 42 additions & 0 deletions src/dvpio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,45 @@ def wrapper(*args, **kwargs):
return func(*args, **kwargs)

return wrapper


def deprecated_docs(func):
"""Decorator to mark a function as deprecated in the docstring."""
func.__doc__ = f"""**Warning: This function is deprecated and will be removed in the next minor release**\n\n
{func.__doc__ or ""}"""
return func


def deprecated_log(message=None):
"""Decorator to mark a function as deprecated with a warning log.

Parameters
----------
message
Optional custom deprecation message. If not provided, uses default format.
"""

def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if message is None:
warning_message = f"Function {func.__name__} is deprecated and will be removed in future versions."
else:
warning_message = message

warnings.warn(
warning_message,
category=DeprecationWarning,
stacklevel=2,
)
return func(*args, **kwargs)

return wrapper

# Handle both @deprecated_log and @deprecated_log() usage
if callable(message):
func = message
message = None
return decorator(func)

return decorator
4 changes: 2 additions & 2 deletions src/dvpio/read/omics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .report_reader import available_reader, parse_df, read_precursor_table
from .report_reader import available_reader, parse_df, read_pg_table, read_precursor_table

__all__ = ["available_reader", "parse_df", "read_precursor_table"]
__all__ = ["available_reader", "parse_df", "read_precursor_table", "read_pg_table"]
136 changes: 131 additions & 5 deletions src/dvpio/read/omics/report_reader.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
from collections.abc import Mapping
from typing import Any
from typing import Any, Literal

import anndata as ad
import pandas as pd
from alphabase.pg_reader.pg_reader import pg_reader_provider
from alphabase.psm_reader.psm_reader import psm_reader_provider
from spatialdata.models import TableModel

from dvpio._utils import experimental_docs, experimental_log
from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log

from ._anndata import AnnDataFactory

SAMPLE_ID_NAME: str = "sample_id"

def available_reader() -> list[str]:
"""Get a list of all available readers, as provided by alphabase"""
return sorted(psm_reader_provider.reader_dict.keys())

def available_reader(reader_type: Literal["psm_reader", "pg_reader"] = "psm_reader") -> list[str]:
"""Get a list of all available readers, as provided by alphabase

Parameters
----------
reader_type
Whether to return readers for peptice spectrum matches (`psm_reader`) or protein group
intensities (`pg_reader`)
"""
if reader_type == "psm_reader":
return sorted(psm_reader_provider.reader_dict.keys())
elif reader_type == "pg_reader":
return sorted(pg_reader_provider.reader_dict.keys())
else:
raise KeyError(f"Pass either `psm_reader` or `pg_reader`, not {reader_type}")


def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None = None) -> pd.DataFrame:
Expand Down Expand Up @@ -43,6 +58,10 @@ def _parse_pandas_index(index: pd.Index | pd.MultiIndex, set_index: str | None =
return df


@deprecated_log(
"This function is deprecated and will be removed in the next minor release. Use `dvpio.read.omics.read_pg_table` instead."
)
@deprecated_docs
def parse_df(
df: pd.DataFrame, obs_index: str | None = None, var_index: str | None = None, **table_kwargs
) -> ad.AnnData:
Expand Down Expand Up @@ -192,3 +211,110 @@ def read_precursor_table(
adata = factory.create_anndata()

return TableModel.parse(adata, **kwargs)


def read_pg_table(
path: str,
search_engine: str,
*,
column_mapping: dict[str, Any] | None = None,
measurement_regex: str | None = None,
reader_provider_kwargs: dict | None = None,
**kwargs: Any,
) -> TableModel:
"""Read protein group table to the :class:`anndata.AnnData` format

Read (features x observations) protein group matrices from proteomics search engines into
the :class:`anndata.AnnData` format (observations x features). Per default,
raw intensities are returned, which can be modified dependening on the search engine.

Supported formats include

- AlphaDIA (`alphadia`)
- AlphaPept (`alphapept`, csv+hdf)
- DIANN (`diann`)
- MaxQuant (`maxquant`)
- Spectronaut (`spectronaut`, parquet + tsv)

see :func:`dvpio.read.omics.available_reader` for a complete list.

See `alphabase.pg_reader` module for more information

Parameters
----------
path
Path to protein group matrix
reader_type
Name of engine output, pass the method name of the corresponding reader. You can
list all available readers with the :func:`dvpio.read.omics.available_reader` helper function
column_mapping
A dictionary of mapping alphabase columns (keys) to the corresponding columns in the other
search engine (values). If `None` will be loaded from the `column_mapping` key of the respective
search engine in `pg_reader.yaml`. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`.
measurement_regex
Regular expression that identifies correct measurement type. Only relevant if PG matrix contains multiple
measurement types. For example, alphapept returns the raw protein intensity per sample in column `A` and the
LFQ corrected value in `A_LFQ`. If `None` uses all columns. Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`.
reader_provider_kwargs
Passed to :meth:`alphabase.pg_reader.pg_reader_provider.get_reader`
kwargs
Passed to :meth:`spatialdata.models.TableModel.parse`

Returns
-------
:class:`anndata.AnnData`
AnnData object that can be further processed with scVerse packages.

- adata.X
Stores values of the intensity columns in the report of shape observations x features
- adata.obs
Stores observations with protein group matrix sample names as `sample_id` column.
- adata.var
Stores features and feature metadata.

Example
-------

.. code-block:: python

from dvpio.io.read.omics import read_report

alphadia_path = ...
adata = read_pg_table(alphadia_path, reader_type="alphadia")

maxquant_path = ...
# Read LFQ values from MaxQuant report
adata = read_pg_table(maxquant_path, reader_type="maxquant", measurement_regex="lfq")

Get available regular expressions

.. code-block:: python

from alphabase.pg_reader import pg_reader_provider

alphapept_reader = pg_reader_provider.get_reader("alphapept")
alphapept_reader.get_preconfigured_regex()
> {'raw': '^.*(?<!_LFQ)$', 'lfq': '_LFQ$'}

See Also
--------
:mod:`alphabase.pg_reader`
"""
# Build reader_provider_kwargs
# This assures that the default values of the readers are considered (e.g. if `column_mapping="raw"`)
reader_provider_kwargs = {} if reader_provider_kwargs is None else reader_provider_kwargs
if column_mapping is not None:
reader_provider_kwargs["column_mapping"] = column_mapping
if measurement_regex is not None:
reader_provider_kwargs["measurement_regex"] = measurement_regex

reader = pg_reader_provider.get_reader(search_engine, **reader_provider_kwargs)
# Features x Observations
df = reader.import_file(path)

# Observations x Features
adata = ad.AnnData(
X=df.values.T, var=df.index.to_frame(index=False), obs=df.columns.to_frame(index=False, name=SAMPLE_ID_NAME)
)

return TableModel.parse(adata, **kwargs)
31 changes: 27 additions & 4 deletions tests/read/omics/test_report_reader.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import numpy as np
import pandas as pd
import pytest
from alphabase.pg_reader.pg_reader import pg_reader_provider
from alphabase.psm_reader.psm_reader import psm_reader_provider
from spatialdata.models import TableModel

from dvpio.read.omics import available_reader, parse_df, read_precursor_table
from dvpio.read.omics import available_reader, parse_df, read_pg_table, read_precursor_table
from dvpio.read.omics.report_reader import _parse_pandas_index


Expand Down Expand Up @@ -57,10 +58,14 @@ def alphadia_pg_report() -> pd.DataFrame:
return df.T


def test_available_reader() -> None:
list_of_available_reader = available_reader()
@pytest.mark.parametrize(("reader_type",), [("psm_reader",), ("pg_reader",)])
def test_available_reader(reader_type: str) -> None:
list_of_available_reader = available_reader(reader_type)

assert len(list_of_available_reader) == len(psm_reader_provider.reader_dict)
if reader_type == "psm_reader":
assert len(list_of_available_reader) == len(psm_reader_provider.reader_dict)
elif reader_type == "pg_reader":
assert len(list_of_available_reader) == len(pg_reader_provider.reader_dict)
assert "alphadia" in list_of_available_reader


Expand Down Expand Up @@ -228,3 +233,21 @@ def test_parse_df_real_data(
def test_read_precursor_table(path: str, reader_type: str, func_kwargs: dict, shape: tuple[int]) -> None:
adata = read_precursor_table(path, reader_type=reader_type, **func_kwargs)
assert adata.shape == shape


@pytest.mark.parametrize(
("path", "reader_type", "func_kwargs", "shape", "var_shape"),
[
("./data/omics/alphadia/alphadia.protein-group.tsv", "alphadia", {}, (3, 7497), (7497, 1)),
("./data/omics/alphapept/alphapept.protein-group.csv", "alphapept", {}, (2, 3781), (3781, 5)),
],
)
def test_read_pg_table(
path: str, reader_type: str, func_kwargs: dict, shape: tuple[int], var_shape: tuple[int]
) -> None:
"""Test read pg table with real data"""

adata = read_pg_table(path, search_engine=reader_type, **func_kwargs)

assert adata.shape == shape
assert adata.var.shape == var_shape
75 changes: 74 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from dvpio._utils import experimental_docs, experimental_log, is_parsed
from dvpio._utils import deprecated_docs, deprecated_log, experimental_docs, experimental_log, is_parsed


@pytest.fixture()
Expand Down Expand Up @@ -29,3 +29,76 @@ def test_experimental_log(function_factory):

with pytest.warns(UserWarning, match="is experimental and may change"):
sample_func()


def test_deprecated_docs(function_factory):
sample_func = deprecated_docs(function_factory)

assert "Warning: This function is deprecated" in sample_func.__doc__


def test_deprecated_log_default_message(function_factory):
"""Test deprecated_log with default message using @ syntax."""
sample_func = deprecated_log()(function_factory)

with pytest.warns(
DeprecationWarning, match="Function sample_func is deprecated and will be removed in future versions"
):
sample_func()


def test_deprecated_log_custom_message(function_factory):
"""Test deprecated_log with custom message."""
custom_message = "This function is obsolete. Use new_function() instead."
sample_func = deprecated_log(custom_message)(function_factory)

with pytest.warns(DeprecationWarning, match="This function is obsolete. Use new_function\\(\\) instead."):
sample_func()


def test_deprecated_log_preserves_function_metadata(function_factory):
"""Test that deprecated_log preserves function name and docstring."""
sample_func = deprecated_log(function_factory)

assert sample_func.__name__ == function_factory.__name__
assert sample_func.__doc__ == function_factory.__doc__


def test_deprecated_log_preserves_return_value(function_factory):
"""Test that deprecated_log preserves function return value."""

def func_with_return():
return "test_value"

decorated_func = deprecated_log(func_with_return)

with pytest.warns(DeprecationWarning):
result = decorated_func()

assert result == "test_value"


def test_deprecated_log_preserves_arguments():
"""Test that deprecated_log passes through arguments correctly."""

def func_with_args(a, b, c=None):
return (a, b, c)

decorated_func = deprecated_log(func_with_args)

with pytest.warns(DeprecationWarning):
result = decorated_func(1, 2, c=3)

assert result == (1, 2, 3)


def test_deprecated_log_warning_category():
"""Test that deprecated_log uses correct warning category."""

def sample_func():
pass

decorated_func = deprecated_log(sample_func)

with pytest.warns(DeprecationWarning):
decorated_func()
Loading