BiocPy · LTLA · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py
@@ -63,3 +63,5 @@
 
 from .biocobject import BiocObject
 from .table import table
+
+from .duplicated import duplicated, unique
diff --git a/src/biocutils/duplicated.py b/src/biocutils/duplicated.py
@@ -0,0 +1,201 @@
+from typing import Any, Union, Sequence
+from functools import singledispatch
+
+import numpy
+
+from .Factor import Factor
+from .subset import subset
+
+
+@singledispatch
+def duplicated(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool = False) -> numpy.ndarray:
+    """
+    Find duplicated elements of ``x``.
+
+    Args:
+        x:
+            Object to be searched for duplicates.
+            This is usually a sequence that can be iterated over.
+
+        incomparables:
+            Values of ``x`` that cannot be compared.
+            Any value of ``x`` in ``incomparables`` will never be a duplicate.
+            Any object that has an ``__in__`` method can be used here.
+
+        from_last:
+            Whether to report the last occurrence as a non-duplicate.
+
+    Returns:
+        NumPy array of length equal to that of ``x``,
+        containing truthy values for only the first occurrence of each value of ``x``.
+        If ``from_last = True``, truthy values are only reported for the last occurrence of each value of ``x``.
+
+    Examples:
+        >>> import biocutils
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         1,
+        ...         2,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         1,
+        ...         2,
+        ...         3,
+        ...         2,
+        ...     ],
+        ...     from_last=True,
+        ... )
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ],
+        ...     incomparables=set(
+        ...         [None]
+        ...     ),
+        ... )
+    """
+
+    available = set()
+    output = numpy.ndarray(len(x), dtype=numpy.bool_)
+
+    def process(i, y):
+        if y in incomparables:
+            output[i] = False
+        elif y in available:
+            output[i] = True
+        else:
+            available.add(y)
+            output[i] = False
+
+    if not from_last:
+        for i, y in enumerate(x):
+            process(i, y)
+    else:
+        for i in range(len(x) - 1, -1, -1):
+            process(i, x[i])
+
+    return output
+
+
+@duplicated.register
+def _duplicated_Factor(
+    x: Factor, incomparables: Union[set, Sequence] = set(), from_last: bool = False
+) -> numpy.ndarray:
+    present = []
+    for lev in x.get_levels():
+        if lev in incomparables:
+            present.append(None)
+        else:
+            present.append(False)
+
+    # Handling codes of -1, i.e., None.
+    if None in incomparables:
+        present.append(None)
+    else:
+        present.append(False)
+
+    output = numpy.ndarray(len(x), dtype=numpy.bool_)
+
+    def process(i, y):
+        tmp = present[y]
+        if tmp is None:
+            output[i] = False
+        elif tmp:
+            output[i] = True
+        else:
+            present[y] = True
+            output[i] = False
+
+    if not from_last:
+        for i, y in enumerate(x.get_codes()):
+            process(i, y)
+    else:
+        codes = x.get_codes()
+        for i in range(len(x) - 1, -1, -1):
+            process(i, codes[i])
+
+    return output
+
+
+def unique(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool = False) -> Any:
+    """
+    Get all unique values of ``x``.
+
+    Args:
+        x:
+            Object in which to find unique entries.
+            This is usually a sequence that can be iterated over.
+
+        incomparables:
+            Values of ``x`` that cannot be compared.
+            Any value of ``x`` in ``incomparables`` will never be a duplicate.
+            Any object that has an ``__in__`` method can be used here.
+
+        from_last:
+            Whether to retain the last occurrence of each value in ``x``.
+            By default, the first occurrence is retained.
+
+    Returns:
+        An object containing unique values of ``x``.
+        This is usually of the same class as ``x``.
+
+    Examples:
+        >>> import biocutils
+        >>> biocutils.unique(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         1,
+        ...         2,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.unique(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.unique(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ],
+        ...     incomparables=set(
+        ...         [None]
+        ...     ),
+        ... )
+    """
+    return subset(x, numpy.where(numpy.logical_not(duplicated(x, incomparables=incomparables, from_last=from_last)))[0])
diff --git a/tests/test_duplicated.py b/tests/test_duplicated.py
@@ -0,0 +1,23 @@
+import biocutils
+
+
+def test_duplicated_basic():
+    assert list(biocutils.duplicated([1,2,1,2,3,2])) == [False, False, True, True, False, True]
+    assert list(biocutils.duplicated([1,2,1,2,3,2], from_last=True)) == [True, True, False, True, False, False]
+    assert list(biocutils.duplicated([1,2,None,None,3,2,3])) == [False, False, False, True, False, True, True]
+    assert list(biocutils.duplicated([1,2,None,None,3,2,3], incomparables=set([None]))) == [False, False, False, False, False, True, True]
+
+
+def test_duplicated_Factor():
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]))) == [False, False, True, True, False, True]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]), from_last=True)) == [True, True, False, True, False, False]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]))) == [False, False, False, True, False, True, True]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]), incomparables=set([None]))) == [False, False, False, False, False, True, True]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]), incomparables=set(["2"]))) == [False, False, False, True, False, False, True]
+
+
+def test_unique():
+    assert biocutils.unique([1,2,1,2,3,2]) == [1,2,3]
+    assert biocutils.unique([1,2,1,2,3,2], from_last=True) == [1,3,2]
+    assert biocutils.unique([1,2,None,None,3,2]) == [1,2,None,3]
+    assert biocutils.unique([1,2,None,None,3,2], incomparables=set([None])) == [1,2,None,None,3]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -63,3 +63,5 @@

		from .biocobject import BiocObject
		from .table import table

		from .duplicated import duplicated, unique