Skip to content

Commit e48a32f

Browse files
committed
util.get_unique_names_duplicates: Fix duplication when indexed name already exists
1 parent 9eac7d1 commit e48a32f

File tree

2 files changed

+45
-20
lines changed

2 files changed

+45
-20
lines changed

Orange/data/tests/test_util.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,39 @@ def test_get_unique_names_from_duplicates(self):
5959
["x (2)", "x (3)", "x (1)"])
6060
self.assertEqual(
6161
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"]),
62-
["x (2) (1)", "x (1)", "x (4)", "x (2) (2)", "x (3)"])
62+
["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"])
63+
self.assertEqual(
64+
get_unique_names_duplicates(["iris", "iris", "iris (1)"]),
65+
["iris (2)", "iris (3)", "iris (1)"])
66+
67+
self.assertEqual(
68+
get_unique_names_duplicates(["foo", "bar", "baz"], return_duplicated=True),
69+
(["foo", "bar", "baz"], []))
70+
self.assertEqual(
71+
get_unique_names_duplicates(["foo", "bar", "baz", "bar"], return_duplicated=True),
72+
(["foo", "bar (1)", "baz", "bar (2)"], ["bar"]))
73+
self.assertEqual(
74+
get_unique_names_duplicates(["x", "x", "x (1)"], return_duplicated=True),
75+
(["x (2)", "x (3)", "x (1)"], ["x"]))
76+
self.assertEqual(
77+
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"], return_duplicated=True),
78+
(["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"], ["x (2)", "x"]))
6379
self.assertEqual(
6480
get_unique_names_duplicates(["x", "", "", None, None, "x"]),
6581
["x (1)", "", "", None, None, "x (2)"])
82+
self.assertEqual(
83+
get_unique_names_duplicates(["iris", "iris", "iris (1)", "iris (2)"], return_duplicated=True),
84+
(["iris (3)", "iris (4)", "iris (1)", "iris (2)"], ["iris"]))
85+
86+
self.assertEqual(
87+
get_unique_names_duplicates(["iris (1) (1)", "iris (1)", "iris (1)"]),
88+
["iris (1) (1)", "iris (1) (2)", "iris (1) (3)"]
89+
)
90+
91+
self.assertEqual(
92+
get_unique_names_duplicates(["iris (1) (1)", "iris (1)", "iris (1)", "iris", "iris"]),
93+
["iris (1) (1)", "iris (1) (2)", "iris (1) (3)", "iris (2)", "iris (3)"]
94+
)
6695

6796
def test_get_unique_names_domain(self):
6897
(attrs, classes, metas), renamed = \

Orange/data/util.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
Data-manipulation utilities.
33
"""
44
import re
5-
from collections import Counter, defaultdict
6-
from itertools import chain
5+
from collections import Counter
6+
from itertools import chain, count
77
from typing import Callable
88

99
import numpy as np
@@ -155,8 +155,8 @@ def get_indices(names, name):
155155
:param name: str
156156
:return: list of indices
157157
"""
158-
return [int(a.group(2)) for x in names
159-
for a in re.finditer(RE_FIND_INDEX.format(name), x)]
158+
return [int(a.group(2)) for x in filter(None, names)
159+
for a in re.finditer(RE_FIND_INDEX.format(re.escape(name)), x)]
160160

161161

162162
def get_unique_names(names, proposed):
@@ -203,26 +203,22 @@ def get_unique_names(names, proposed):
203203
return [f"{name} ({max_index})" for name in proposed]
204204

205205

206-
def get_unique_names_duplicates(proposed: list) -> list:
206+
def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list:
207207
"""
208208
Returns list of unique names. If a name is duplicated, the
209-
function appends the smallest available index in parentheses.
209+
function appends the next available index in parentheses.
210210
211211
For example, a proposed list of names `x`, `x` and `x (2)`
212-
results in `x (1)`, `x (3)`, `x (2)`.
212+
results in `x (3)`, `x (4)`, `x (2)`.
213213
"""
214-
counter = Counter(proposed)
215-
index = defaultdict(int)
216-
names = []
217-
for name in proposed:
218-
if name and counter[name] > 1:
219-
unique_name = name
220-
while unique_name in counter:
221-
index[name] += 1
222-
unique_name = f"{name} ({index[name]})"
223-
name = unique_name
224-
names.append(name)
225-
return names
214+
indices = {name: count(max(get_indices(proposed, name), default=0) + 1)
215+
for name, cnt in Counter(proposed).items()
216+
if name and cnt > 1}
217+
new_names = [f"{name} ({next(indices[name])})" if name in indices else name
218+
for name in proposed]
219+
if return_duplicated:
220+
return new_names, list(indices)
221+
return new_names
226222

227223

228224
def get_unique_names_domain(attributes, class_vars=(), metas=()):

0 commit comments

Comments
 (0)