Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 39 additions & 10 deletions src/arviz_base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import re
import warnings
from collections.abc import Sequence

import numpy as np

Expand Down Expand Up @@ -90,8 +91,21 @@ def _subset_list(subset, whole_list, filter_items=None, warn=True, check_if_pres
and ``filter_items``.
"""
if subset is not None:
if isinstance(subset, str):
if subset in whole_list:
subset = [subset]
elif isinstance(subset, str):
subset = [subset]
elif isinstance(subset, Sequence) and not isinstance(subset, str | bytes):
subset = list(subset)
Comment on lines +98 to +99
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what this is doing

else:
subset = [subset]

def _string_parts(obj):
if isinstance(obj, str):
return [obj]
if isinstance(obj, Sequence) and not isinstance(obj, str | bytes):
return [x for x in obj if isinstance(x, str)]
return []

whole_list_tilde = [item for item in whole_list if _check_tilde_start(item)]
if whole_list_tilde and warn:
Expand All @@ -112,11 +126,17 @@ def _subset_list(subset, whole_list, filter_items=None, warn=True, check_if_pres
for pattern in excluded_items[:]:
excluded_items.remove(pattern)
if filter_items == "like":
real_items = [real_item for real_item in whole_list if pattern in real_item]
real_items = [
real_item
for real_item in whole_list
if any(pattern in s for s in _string_parts(real_item))
]
Comment on lines 129 to 133
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure this is what we want. IIUC, with this behaviour, if I use var_names="~theta", filter_vars="like" and I have as variable names ("theta", "original"), ("theta", "transformed"), and ("tau", "original") I end up plotting/keeping all the variables.

I think for like it would make more sense to exclude the first two. For regex I am much less sure if we want to try and do something complicated or keep things simple and ignore filter_vars completely in case of non-string elements.

Important note: This is a collaborative project and it is quite probably it will take a while until we all agree on a behaviour around this. I may have ideas, but me saying "I think this or that should happen" doesn't automatically mean this should be the behaviour of the library. It can be frustrating but you'll probably need some extra patience for this PR.

Copy link
Author

@Chirag3841 Chirag3841 Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification. I agree it’s better to align with xarray behavior and get consensus before finalizing anything. I’m happy to iterate based on feedback and adjust the implementation/tests as needed. Please let me know what target behavior you’d prefer and I can update the PR accordingly.

else:
# i.e filter_items == "regex"
real_items = [
real_item for real_item in whole_list if re.search(pattern, real_item)
real_item
for real_item in whole_list
if any(re.search(pattern, s) for s in _string_parts(real_item))
]
if not real_items:
not_found.append(pattern)
Expand All @@ -129,14 +149,23 @@ def _subset_list(subset, whole_list, filter_items=None, warn=True, check_if_pres
subset = [item for item in whole_list if item not in excluded_items]

elif filter_items == "like":
subset = [item for item in whole_list for name in subset if name in item]
subset = [
item
for item in whole_list
for name in subset
if isinstance(name, str) and any(name in s for s in _string_parts(item))
]
elif filter_items == "regex":
subset = [item for item in whole_list for name in subset if re.search(name, item)]

existing_items = np.isin(subset, whole_list)
if check_if_present and not np.all(existing_items):
raise KeyError(f"{np.array(subset)[~existing_items]} are not present")

subset = [
item
for item in whole_list
for name in subset
if isinstance(name, str) and any(re.search(name, s) for s in _string_parts(item))
]
existing_items = [item in whole_list for item in subset]
if check_if_present and not all(existing_items):
missing = [item for item, ok in zip(subset, existing_items) if not ok]
raise KeyError(f"{missing} are not present")
return subset


Expand Down
13 changes: 5 additions & 8 deletions src/arviz_base/utils.pyi
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# File generated with docstub

import re
import warnings
from collections.abc import Hashable, Sequence
from typing import Literal

import numpy as np
from _typeshed import Incomplete
from numpy.typing import ArrayLike
from xarray import DataArray, Dataset
Expand All @@ -18,12 +15,12 @@ def _var_names(
check_if_present: bool = ...,
) -> list | None: ...
def _subset_list(
subset: str,
whole_list: list,
subset: Hashable | Sequence[Hashable] | None,
whole_list: Sequence[Hashable],
filter_items: Literal[None, "like", "regex"] | None = ...,
warn=...,
check_if_present=...,
) -> list | None: ...
warn: bool = ...,
check_if_present: bool = ...,
) -> list[Hashable] | None: ...
def _get_coords(
data: DataArray, coords: dict[Hashable, ArrayLike]
) -> Dataset | DataArray: ...
Expand Down
39 changes: 39 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# pylint: disable=redefined-outer-name
from collections.abc import Hashable

import numpy as np
import pytest

Expand Down Expand Up @@ -146,3 +148,40 @@ def test_subset_list_negation_not_found():
names = ["mu", "theta"]
with pytest.warns(UserWarning, match=".+not.+found.+"):
assert _subset_list("~tau", names) == names


def test_subset_list_tuple_name_scalar():
whole_list = [("tuple", "name"), "a"]
out = _subset_list(("tuple", "name"), whole_list)
assert out == [("tuple", "name")]


def test_subset_list_tuple_name_list():
whole_list = [("tuple", "name"), "str_name"]
out = _subset_list([("tuple", "name"), "str_name"], whole_list)
assert out == [("tuple", "name"), "str_name"]


def test_subset_list_tuple_container():
whole_list = [("tuple", "name"), "str_name"]
out = _subset_list((("tuple", "name"), "str_name"), whole_list)
assert out == [("tuple", "name"), "str_name"]


def test_subset_list_like_ignores_tuple_patterns():
whole_list = [("tuple", "name"), "alpha", "beta"]
out = _subset_list([("tuple", "name"), "alp"], whole_list, filter_items="like")
assert out == ["alpha"]


def test_subset_list_regex_ignores_tuple_patterns():
whole_list = [("tuple", "name"), "alpha", "beta"]
out = _subset_list([("tuple", "name"), "alp.*"], whole_list, filter_items="regex")
assert out == ["alpha"]


def test_subset_list_frozenset_name_scalar():
v = frozenset({"a", "b"})
whole_list: list[Hashable] = [v, "x"]
out = _subset_list(v, whole_list)
assert out == [v]