Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,78 @@ def test_get_unique_names_with_domain(self):
self.assertEqual(get_unique_names(domain, "foo"), "foo (1)")
self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")

def test_get_unique_names_not_equal(self):
names = ["foo", "bar", "baz", "baz (3)"]
self.assertEqual(
get_unique_names(names, ["qux"], equal_numbers=False), ["qux"]
)
self.assertEqual(
get_unique_names(names, ["foo"], equal_numbers=False), ["foo (1)"]
)
self.assertEqual(
get_unique_names(names, ["baz"], equal_numbers=False), ["baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["baz (3)"], equal_numbers=False),
["baz (3) (1)"]
)
self.assertEqual(
get_unique_names(names, ["qux", "quux"], equal_numbers=False),
["qux", "quux"]
)
self.assertEqual(
get_unique_names(names, ["bar", "baz"], equal_numbers=False),
["bar (1)", "baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["qux", "baz"], equal_numbers=False),
["qux", "baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["qux", "bar"], equal_numbers=False),
["qux", "bar (1)"]
)
self.assertEqual(
get_unique_names(names, ["foo", "bar", "baz"], equal_numbers=False),
["foo (1)", "bar (1)", "baz (4)"]
)

a, b, c, d = map(ContinuousVariable, ["foo", "bar", "baz", "baz (3)"])
domain = Domain([a, b], c, [d])
self.assertEqual(
get_unique_names(names, ["qux"], equal_numbers=False), ["qux"]
)
self.assertEqual(
get_unique_names(names, ["foo"], equal_numbers=False), ["foo (1)"]
)
self.assertEqual(
get_unique_names(names, ["baz"], equal_numbers=False), ["baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["baz (3)"], equal_numbers=False),
["baz (3) (1)"]
)
self.assertEqual(
get_unique_names(domain, ["qux", "quux"], equal_numbers=False),
["qux", "quux"]
)
self.assertEqual(
get_unique_names(domain, ["bar", "baz"], equal_numbers=False),
["bar (1)", "baz (4)"]
)
self.assertEqual(
get_unique_names(domain, ["qux", "baz"], equal_numbers=False),
["qux", "baz (4)"]
)
self.assertEqual(
get_unique_names(domain, ["qux", "bar"], equal_numbers=False),
["qux", "bar (1)"]
)
self.assertEqual(
get_unique_names(domain, ["foo", "bar", "baz"], equal_numbers=False),
["foo (1)", "bar (1)", "baz (4)"]
)

def test_get_unique_names_from_duplicates(self):
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz"]),
Expand Down
29 changes: 17 additions & 12 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import bottleneck as bn
from scipy import sparse as sp

RE_FIND_INDEX = r"(^{} \()(\d{{1,}})(\)$)"
RE_FIND_INDEX = r"(^{})( \((\d{{1,}})\))?$"


def one_hot(
Expand Down Expand Up @@ -167,11 +167,11 @@ def get_indices(names, name):
:param name: str
:return: list of indices
"""
return [int(a.group(2)) for x in filter(None, names)
return [int(a.group(3) or 0) for x in filter(None, names)
for a in re.finditer(RE_FIND_INDEX.format(re.escape(name)), x)]


def get_unique_names(names, proposed):
def get_unique_names(names, proposed, equal_numbers=True):
"""
Returns unique names for variables

Expand All @@ -189,13 +189,15 @@ def get_unique_names(names, proposed):
list.

The method is used in widgets like MDS, which adds two variables (`x` and
`y`). It is desired that they have the same index. If `x`, `x (1)` and
`x (2)` and `y` (but no other `y`'s already exist in the domain, MDS
should append `x (3)` and `y (3)`, not `x (3)` and y (1)`.
`y`). It is desired that they have the same index. In case when
equal_numbers=True, if `x`, `x (1)` and `x (2)` and `y` (but no other
`y`'s already exist in the domain, MDS should append `x (3)` and `y (3)`,
not `x (3)` and y (1)`.

Args:
names (Domain or list of str): used names
proposed (str or list of str): proposed name
equal_numbers (bool): Add same number to all proposed names

Return:
str or list of str
Expand All @@ -206,13 +208,16 @@ def get_unique_names(names, proposed):
names = [var.name for var in chain(names.variables, names.metas)]
if isinstance(proposed, str):
return get_unique_names(names, [proposed])[0]
indicess = [indices
for indices in (get_indices(names, name) for name in proposed)
if indices]
if not (set(proposed) & set(names) or indicess):
indices = {name: get_indices(names, name) for name in proposed}
indices = {name: max(ind) + 1 for name, ind in indices.items() if ind}
if not (set(proposed) & set(names) or indices):
return proposed
max_index = max(map(max, indicess), default=0) + 1
return [f"{name} ({max_index})" for name in proposed]
if equal_numbers:
max_index = max(indices.values())
return [f"{name} ({max_index})" for name in proposed]
else:
return [f"{name} ({indices[name]})" if name in indices else name
for name in proposed]


def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list:
Expand Down