Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/biocutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,5 @@

from .biocobject import BiocObject
from .table import table

from .order import order, sort
214 changes: 214 additions & 0 deletions src/biocutils/order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
from typing import Any, Union, Sequence, Optional
from functools import singledispatch

import numpy

from .subset import subset
from .Factor import Factor


@singledispatch
def order(
x: Any,
force_last: Union[set, Sequence] = [None, numpy.ma.masked, numpy.nan],
decreasing: bool = False,
dtype: Optional[numpy.dtype] = None,
) -> numpy.ndarray:
"""
Obtain an ordering of entries of ``x``.
This ordering should be stable.

Args:
x:
Values to be ordered.
All values should be comparable aside from those listed in ``force_last``.

force_last:
Values that are incomparable and will be placed last, i.e., at the end of the ordering.
No attempt is made to order values within ``force_last``.

decreasing:
Whether to order by decreasing value.
Default is to order by increasing value.

dtype:
Integer type of the output array.
If ``None``, defaults to the smallest type that can hold the length of ``x``.

Returns:
Integer NumPy array containing the ordering required to permute ``x`` into a sorted state,
i.e., given an output array ``o``, ``subset(x, o)`` will be sorted.

Examples:
>>> import biocutils
>>>
>>> x = [
... 15,
... 1,
... 22,
... 3,
... 14,
... ]
>>> o = biocutils.order(
... x
... )
>>> print(o)
>>> biocutils.subset(
... x, o
... )
>>> o = biocutils.order(
... x,
... decreasing=True,
... )
>>> print(o)
>>> biocutils.subset(
... x, o
... )
>>>
>>> x = [
... "C",
... "B",
... None,
... "D",
... "D",
... None,
... "A",
... ]
>>> o = biocutils.order(
... x
... )
>>> print(o)
>>> biocutils.subset(
... x, o
... )
>>> o = biocutils.order(
... x,
... force_last=set(
... [None, "A"]
... ),
... )
>>> print(o)
>>> biocutils.subset(
... x, o
... )
>>>
>>> # Factor ordering respects the ordering in the levels.
>>> x = biocutils.Factor.from_sequence(
... [
... "C",
... "B",
... "D",
... "A",
... "C",
... "A",
... "D",
... ],
... [
... "D",
... "C",
... "B",
... "A",
... ],
... )
>>> o = biocutils.order(
... x
... )
>>> print(o)
>>> print(
... biocutils.subset(
... x, o
... )
... )
"""

collected = []
forced = []
if len(force_last) > 0:
for i, y in enumerate(x):
if y not in force_last:
collected.append(i)
else:
forced.append(i)
else:
collected = list(range(len(x)))

def key(i):
return x[i]

collected.sort(key=key, reverse=decreasing)

if dtype is None:
dtype = numpy.min_scalar_type(len(x) - 1)
output = numpy.ndarray(len(x), dtype=dtype)
output[: len(collected)] = collected
if len(forced) > 0:
output[len(collected) :] = forced

return output


@order.register
def _order_Factor(
x: Factor,
force_last: Union[set, Sequence] = set([None]),
decreasing: bool = False,
dtype: Optional[numpy.dtype] = None,
) -> numpy.ndarray:
new_force_last = set()
for i, lev in enumerate(x.get_levels()):
if lev in force_last:
new_force_last.add(i)
if None in force_last:
new_force_last.add(-1)

# For consistency with R, we order by codes.
return order.registry[object](x.get_codes(), force_last=new_force_last, decreasing=decreasing, dtype=dtype)


@singledispatch
def sort(x: Any, force_last: Union[set, Sequence] = [None, numpy.ma.masked], decreasing: bool = False) -> Any:
"""
Sort an arbitrary iterable sequence.

Args:
x:
Values to be sorted.
All values should be comparable aside from those listed in ``force_last``.

force_last:
Values that are incomparable and will be placed last, i.e., at the end of the ordering.
No attempt is made to order values within ``force_last``.

decreasing:
Whether to sort by decreasing value.
Default is to sort by increasing value.

Returns:
Sorted contents of ``x``.
This is usually of the same class as ``x``.

Examples:
>>> import biocutils
>>> biocutils.sort(
... range(
... 20, 10, -1
... )
... )
>>> biocutils.sort(
... [
... "A",
... "B",
... None,
... "C",
... "D",
... ],
... decreasing=True,
... )
>>> import numpy
>>> biocutils.sort(
... numpy.random.rand(
... 10
... )
... )
"""
return subset(x, order(x, force_last=force_last, decreasing=decreasing))
58 changes: 58 additions & 0 deletions tests/test_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import biocutils
import numpy


def test_order_simple():
o = biocutils.order(["D", "B", "C", "A"])
assert list(o) == [3, 1, 2, 0]
assert o.dtype == numpy.dtype("uint8")

o = biocutils.order(["D", "B", "C", "A"], dtype=numpy.dtype("uint32"))
assert list(o) == [3, 1, 2, 0]
assert o.dtype == numpy.dtype("uint32")

# Handles ties stably.
o = biocutils.order(["D", "B", "D", "C", "A", "D"])
assert list(o) == [4, 1, 3, 0, 2, 5]

# Reverses correctly with stable ties.
o = biocutils.order(["D", "B", "D", "C", "A", "D"], decreasing=True)
assert list(o) == [0, 2, 5, 3, 1, 4]

# Ignores incomparable values.
o = biocutils.order(["D", "B", None, "C", "A"])
assert list(o) == [4, 1, 3, 0, 2]

o = biocutils.order(["D", "B", "C", "A"], force_last=[]) # for coverage purposes.
assert list(o) == [3, 1, 2, 0]


def test_order_Factor():
f = biocutils.Factor.from_sequence(["D", "B", "C", "A"])
o = biocutils.order(f)
assert list(o) == [3, 1, 2, 0]
o = biocutils.order(f, decreasing=True)
assert list(o) == [0, 2, 1, 3]

o = biocutils.order(f, force_last=[]) # for coverage purposes.
assert list(o) == [3, 1, 2, 0]

# Respects the level ordering.
o = biocutils.order(biocutils.Factor.from_sequence(["D", "B", "C", "A"], ["D", "C", "B", "A"]))
assert list(o) == [0, 2, 1, 3]

# Respects various incomparable values.
f = biocutils.Factor.from_sequence(["D", "B", None, "C", "A"])
o = biocutils.order(f)
assert list(o) == [4, 1, 3, 0, 2]
o = biocutils.order(f, force_last=[None, "A"])
assert list(o) == [1, 3, 0, 2, 4]


def test_order_sort():
assert biocutils.sort(["A", "B", None, "C", "D"], decreasing=True) == ["D", "C", "B", "A", None]

x = numpy.random.rand(20)
s = biocutils.sort(x)
assert s.dtype == x.dtype
assert (s == sorted(x)).all()