Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 55 additions & 13 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,74 @@
on:
push:
branches:
- master
- master # for legacy repos
- main
pull_request:
branches:
- master # for legacy repos
- main
workflow_dispatch: # Allow manually triggering the workflow
schedule:
# Run roughly every 15 days at 00:00 UTC
# (useful to check if updates on dependencies break the package)
- cron: "0 0 1,16 * *"

permissions:
contents: read

concurrency:
group: >-
${{ github.workflow }}-${{ github.ref_type }}-
${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]

name: Python ${{ matrix.python-version }}
python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
platform:
- ubuntu-latest
- macos-latest
# - windows-latest
runs-on: ${{ matrix.platform }}
name: Python ${{ matrix.python }}, ${{ matrix.platform }}
steps:
- uses: actions/checkout@v4
with:
submodules: true

- name: Setup Python
uses: actions/setup-python@v5
- uses: actions/setup-python@v5
id: setup-python
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
python-version: ${{ matrix.python }}

- name: Get latest CMake
uses: lukka/get-cmake@latest

- name: Test with tox
- name: Install dependencies
run: |
pip install tox
python -m pip install --upgrade pip
pip install tox coverage

- name: Run tests
run: >-
pipx run --python '${{ steps.setup-python.outputs.python-path }}'
tox
-- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args

- name: Check for codecov token availability
id: codecov-check
shell: bash
run: |
if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then
echo "codecov=true" >> $GITHUB_OUTPUT;
else
echo "codecov=false" >> $GITHUB_OUTPUT;
fi

- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v5

Check warning

Code scanning / CodeQL

Unpinned tag for a non-immutable Action in workflow Medium

Unpinned 3rd party Action 'Test the library' step
Uses Step
uses 'codecov/codecov-action' with ref 'v5', not a pinned commit hash
if: ${{ steps.codecov-check.outputs.codecov == 'true' }}
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
slug: ${{ github.repository }}
flags: ${{ matrix.platform }} - py${{ matrix.python }}
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Version 0.8.0

- Implement parsers for compressed list objects.

## Version 0.7.0 - 0.7.3

- All dependencies are now listed under optional, except for numpy and biocutils.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ version_scheme = "no-guess-dev"
[tool.ruff]
line-length = 120
src = ["src"]
exclude = ["tests"]
# exclude = ["tests"]
extend-ignore = ["F821"]

[tool.ruff.pydocstyle]
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ optional =
summarizedexperiment>=0.4.1
singlecellexperiment>=0.4.1
multiassayexperiment
compressed_lists>=0.3.0

# Add here test requirements (semicolon/line-separated)
testing =
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ def run(self):

def build_cmake(self, ext):
build_temp = pathlib.Path(self.build_temp)
build_temp.mkdir(parents=True, exist_ok=True)
build_lib = pathlib.Path(self.build_lib)
outpath = os.path.join(build_lib.absolute(), ext.name)

build_temp = os.path.join(build_temp, "build")
if not os.path.exists(build_temp):
cmd = [
"cmake",
Expand Down
8 changes: 8 additions & 0 deletions src/rds2py/generics.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@
"ExperimentList": "rds2py.read_dict.read_dict",
# delayed matrices
"H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse",
# compressed lists
"CompressedIntegerList": "rds2py.read_compressed_list.read_compressed_integer_list",
"PartitioningByEnd": "rds2py.read_compressed_list.read_partitioning_by_end",
"CompressedCharacterList": "rds2py.read_compressed_list.read_compressed_string_list",
"CompressedLogicalList": "rds2py.read_compressed_list.read_compressed_boolean_list",
"CompressedNumericList": "rds2py.read_compressed_list.read_compressed_float_list",
"CompressedSplitDataFrameList": "rds2py.read_compressed_list.read_compressed_frame_list",
"CompressedSplitDFrameList": "rds2py.read_compressed_list.read_compressed_frame_list",
}


Expand Down
206 changes: 206 additions & 0 deletions src/rds2py/read_compressed_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""Functions and classes for parsing Compressed List data structures."""

import numpy as np

from .generics import _dispatcher
from .rdsutils import get_class

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def read_partitioning_by_end(robject: dict, **kwargs):
"""Read an partioning by end object.

Args:
robject:
Dictionary containing parsed partioning by end object.

**kwargs:
Additional arguments.

Returns:
A vector containing the partitions.
"""
_cls = get_class(robject)

if _cls not in ["PartitioningByEnd"]:
raise RuntimeError(f"`robject` does not contain not a `PartitioningByEnd` object, contains `{_cls}`.")

ends = _dispatcher(robject["attributes"]["end"], **kwargs)

from compressed_lists import Partitioning

return Partitioning(ends=np.asarray(ends))


def _get_compressed_common_attrs(robject, **kwargs):
if "unlistData" not in robject["attributes"]:
raise ValueError("Object does not contain unlistData, is it really a `CompressedList`?")
unlist_data = _dispatcher(robject["attributes"]["unlistData"], **kwargs)

element_metadata = None
if "elementMetadata" in robject["attributes"]:
element_metadata = _dispatcher(robject["attributes"]["elementMetadata"], **kwargs)

metadata = None
if "metadata" in robject["attributes"]:
metadata = _dispatcher(robject["attributes"]["metadata"], **kwargs)

partition = None
if "partitioning" in robject["attributes"]:
partition = _dispatcher(robject["attributes"]["partitioning"], **kwargs)

return unlist_data, element_metadata, metadata, partition


def read_compressed_integer_list(robject: dict, **kwargs):
"""Read an R compressed integer list.

Args:
robject:
Dictionary containing parsed compressed list.

**kwargs:
Additional arguments.

Returns:
A `CompressedList` from the 'compressed_lists' package.
"""
_cls = get_class(robject)

if _cls not in ["CompressedIntegerList"]:
raise RuntimeError(f"`robject` does not contain not a compressed integer list object, contains `{_cls}`.")

unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)

from compressed_lists import CompressedIntegerList

return CompressedIntegerList(
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
)


def read_compressed_string_list(robject: dict, **kwargs):
"""Read an R compressed string/character list.

Args:
robject:
Dictionary containing parsed compressed list.

**kwargs:
Additional arguments.

Returns:
A `CompressedList` from the 'compressed_lists' package.
"""
_cls = get_class(robject)

if _cls not in ["CompressedCharacterList"]:
raise RuntimeError(f"`robject` does not contain not a compressed string list object, contains `{_cls}`.")

unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)

from compressed_lists import CompressedCharacterList

return CompressedCharacterList(
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
)


def read_compressed_character_list(robject: dict, **kwargs):
"""Read an R compressed string/character list.

Args:
robject:
Dictionary containing parsed compressed string list.

**kwargs:
Additional arguments.

Returns:
A `CompressedList` from the 'compressed_lists' package.
"""
return read_compressed_string_list(robject, **kwargs)


def read_compressed_boolean_list(robject: dict, **kwargs):
"""Read an R compressed boolean list.

Args:
robject:
Dictionary containing parsed compressed list.

**kwargs:
Additional arguments.

Returns:
A `CompressedList` from the 'compressed_lists' package.
"""
_cls = get_class(robject)

if _cls not in ["CompressedLogicalList"]:
raise RuntimeError(f"`robject` does not contain not a compressed boolean list object, contains `{_cls}`.")

unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)

from compressed_lists import CompressedBooleanList

return CompressedBooleanList(
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
)


def read_compressed_float_list(robject: dict, **kwargs):
"""Read an R compressed float list.

Args:
robject:
Dictionary containing parsed compressed list.

**kwargs:
Additional arguments.

Returns:
A `CompressedList` from the 'compressed_lists' package.
"""
_cls = get_class(robject)

if _cls not in ["CompressedNumericList"]:
raise RuntimeError(f"`robject` does not contain not a compressed float list object, contains `{_cls}`.")

unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)

from compressed_lists import CompressedFloatList

return CompressedFloatList(
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
)


def read_compressed_frame_list(robject: dict, **kwargs):
"""Read an R compressed dataframe list.

Args:
robject:
Dictionary containing parsed compressed list.

**kwargs:
Additional arguments.

Returns:
A `CompressedList` from the 'compressed_lists' package.
"""
_cls = get_class(robject)

if _cls not in ["CompressedSplitDataFrameList", "CompressedSplitDFrameList"]:
raise RuntimeError(f"`robject` does not contain not a compressed dataframe list object, contains `{_cls}`.")

unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)

from compressed_lists import CompressedSplitBiocFrameList

return CompressedSplitBiocFrameList(
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
)
Binary file added tests/data/compressedlist_char.rds
Binary file not shown.
Binary file added tests/data/compressedlist_int.rds
Binary file not shown.
Binary file added tests/data/compressedlist_logical.rds
Binary file not shown.
Binary file added tests/data/compressedlist_numeric.rds
Binary file not shown.
Binary file added tests/data/compressedlist_splitdframe.rds
Binary file not shown.
23 changes: 23 additions & 0 deletions tests/data/generate_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,26 @@ h5ls(h5ad_file)

M <- H5SparseMatrix(h5ad_file, "/obsp/connectivities")
saveRDS(M, "h5sparse.rds")

## Compressed lists

library(IRanges)
x <- IntegerList(11:12, integer(0), 3:-2, compress=TRUE)
saveRDS(x, "compressedlist_int.rds")

clist <- CharacterList(LETTERS[1:5], LETTERS[20:24], compress=TRUE)
saveRDS(clist, "compressedlist_char.rds")

x <- NumericList(11.1, 12.2, 13.3, 14.4, 15.5, compress=TRUE)
saveRDS(x, "compressedlist_numeric.rds")


x <- LogicalList(TRUE, FALSE, TRUE, compress=TRUE)
saveRDS(x, "compressedlist_logical.rds")

data("airquality")
aq <- DataFrame(airquality)
aqsplit <-
SplitDataFrameList(as.list(split(aq, aq[["Ozone"]])),
compress = TRUE)
saveRDS(aqsplit, "compressedlist_splitdframe.rds")
2 changes: 0 additions & 2 deletions tests/test_atomics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import pytest

from rds2py import read_rds

from biocutils import BooleanList, FloatList, IntegerList, StringList
Expand Down
Loading