Skip to content

Commit fc7d1f4

Browse files
authored
Merge pull request #3486 from janezd/improve-get-unique-names
util.get_unique_names: Add more flexible arguments
2 parents f5b7873 + 9910f89 commit fc7d1f4

File tree

9 files changed

+105
-65
lines changed

9 files changed

+105
-65
lines changed

Orange/data/tests/test_util.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import unittest
2+
3+
from Orange.data import Domain, ContinuousVariable
4+
from Orange.data.util import get_unique_names
5+
6+
7+
class TestGetUniqueNames(unittest.TestCase):
8+
def test_get_unique_names(self):
9+
names = ["foo", "bar", "baz", "baz (3)"]
10+
self.assertEqual(get_unique_names(names, ["qux"]), ["qux"])
11+
self.assertEqual(get_unique_names(names, ["foo"]), ["foo (1)"])
12+
self.assertEqual(get_unique_names(names, ["baz"]), ["baz (4)"])
13+
self.assertEqual(get_unique_names(names, ["baz (3)"]), ["baz (3) (1)"])
14+
self.assertEqual(
15+
get_unique_names(names, ["qux", "quux"]), ["qux", "quux"])
16+
self.assertEqual(
17+
get_unique_names(names, ["bar", "baz"]), ["bar (4)", "baz (4)"])
18+
self.assertEqual(
19+
get_unique_names(names, ["qux", "baz"]), ["qux (4)", "baz (4)"])
20+
self.assertEqual(
21+
get_unique_names(names, ["qux", "bar"]), ["qux (1)", "bar (1)"])
22+
23+
self.assertEqual(get_unique_names(names, "qux"), "qux")
24+
self.assertEqual(get_unique_names(names, "foo"), "foo (1)")
25+
self.assertEqual(get_unique_names(names, "baz"), "baz (4)")
26+
27+
self.assertEqual(get_unique_names(tuple(names), "baz"), "baz (4)")
28+
29+
def test_get_unique_names_with_domain(self):
30+
a, b, c, d = map(ContinuousVariable, ["foo", "bar", "baz", "baz (3)"])
31+
domain = Domain([a, b], c, [d])
32+
self.assertEqual(get_unique_names(domain, ["qux"]), ["qux"])
33+
self.assertEqual(get_unique_names(domain, ["foo"]), ["foo (1)"])
34+
self.assertEqual(get_unique_names(domain, ["baz"]), ["baz (4)"])
35+
self.assertEqual(get_unique_names(domain, ["baz (3)"]), ["baz (3) (1)"])
36+
self.assertEqual(
37+
get_unique_names(domain, ["qux", "quux"]), ["qux", "quux"])
38+
self.assertEqual(
39+
get_unique_names(domain, ["bar", "baz"]), ["bar (4)", "baz (4)"])
40+
self.assertEqual(
41+
get_unique_names(domain, ["qux", "baz"]), ["qux (4)", "baz (4)"])
42+
self.assertEqual(
43+
get_unique_names(domain, ["qux", "bar"]), ["qux (1)", "bar (1)"])
44+
45+
self.assertEqual(get_unique_names(domain, "qux"), "qux")
46+
self.assertEqual(get_unique_names(domain, "foo"), "foo (1)")
47+
self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")
48+
49+
50+
if __name__ == "__main__":
51+
unittest.main()

Orange/data/util.py

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Data-manipulation utilities.
33
"""
44
import re
5+
from itertools import chain
6+
57
import numpy as np
68
import bottleneck as bn
79
from scipy import sparse as sp
@@ -139,15 +141,42 @@ def get_indices(names, name):
139141

140142
def get_unique_names(names, proposed):
141143
"""
142-
Returns unique names of variables. Variables which are duplicate get appended by
143-
unique index which is the same in all proposed variable names in a list.
144-
:param names: list of strings
145-
:param proposed: list of strings
146-
:return: list of strings
144+
Returns unique names for variables
145+
146+
Proposed is a list of names (or a string with a single name). If any name
147+
already appears in `names`, the function appends an index in parentheses,
148+
which is one higher than the highest index at these variables. Also, if
149+
`names` contains any of the names with index in parentheses, this counts
150+
as an occurence of the name. For instance, if `names` does not contain
151+
`x` but it contains `x (3)`, `get_unique_names` will replace `x` with
152+
`x (4)`.
153+
154+
If argument `names` is domain, the method observes all variables and metas.
155+
156+
Function returns a string if `proposed` is a string, and a list if it's a
157+
list.
158+
159+
The method is used in widgets like MDS, which adds two variables (`x` and
160+
`y`). It is desired that they have the same index. If `x`, `x (1)` and
161+
`x (2)` and `y` (but no other `y`'s already exist in the domain, MDS
162+
should append `x (3)` and `y (3)`, not `x (3)` and y (1)`.
163+
164+
Args:
165+
names (Domain or list of str): used names
166+
proposed (str or list of str): proposed name
167+
168+
Return:
169+
str or list of str
147170
"""
148-
if len([name for name in proposed if name in names]):
149-
max_index = max([max(get_indices(names, name),
150-
default=1) for name in proposed], default=1)
151-
for i, name in enumerate(proposed):
152-
proposed[i] = "{} ({})".format(name, max_index + 1)
153-
return proposed
171+
from Orange.data import Domain # prevent cyclic import
172+
if isinstance(names, Domain):
173+
names = [var.name for var in chain(names.variables, names.metas)]
174+
if isinstance(proposed, str):
175+
return get_unique_names(names, [proposed])[0]
176+
indicess = [indices
177+
for indices in (get_indices(names, name) for name in proposed)
178+
if indices]
179+
if not (set(proposed) & set(names) or indicess):
180+
return proposed
181+
max_index = max(map(max, indicess), default=0) + 1
182+
return [f"{name} ({max_index})" for name in proposed]

Orange/projection/base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,7 @@ def proj_variable(i, name):
143143
def _get_var_names(self, n):
144144
postfixes = ["x", "y"] if n == 2 else [str(i) for i in range(1, n + 1)]
145145
names = [f"{self.var_prefix}-{postfix}" for postfix in postfixes]
146-
domain = self.orig_domain.variables + self.orig_domain.metas
147-
return get_unique_names([v.name for v in domain], names)
146+
return get_unique_names(self.orig_domain, names)
148147

149148

150149
class LinearProjector(Projector):

Orange/projection/pca.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ class PCAModel(DomainProjection, metaclass=WrapperMeta):
7373

7474
def _get_var_names(self, n):
7575
names = [f"{self.var_prefix}{postfix}" for postfix in range(1, n + 1)]
76-
domain = self.orig_domain.variables + self.orig_domain.metas
77-
return get_unique_names([v.name for v in domain], names)
76+
return get_unique_names(self.orig_domain, names)
7877

7978

8079
class IncrementalPCA(SklProjector):

Orange/widgets/unsupervised/owkmeans.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
from Orange.clustering import KMeans
1111
from Orange.clustering.kmeans import KMeansModel, SILHOUETTE_MAX_SAMPLES
1212
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
13+
from Orange.data.util import get_unique_names
1314
from Orange.widgets import widget, gui
1415
from Orange.widgets.settings import Setting
15-
from Orange.widgets.utils.annotated_data import get_next_name, \
16+
from Orange.widgets.utils.annotated_data import \
1617
ANNOTATED_DATA_SIGNAL_NAME, add_columns
1718
from Orange.widgets.utils.concurrent import ThreadExecutor, FutureSetWatcher
1819
from Orange.widgets.utils.sql import check_sql_input
@@ -463,11 +464,12 @@ def send_data(self):
463464

464465
domain = self.data.domain
465466
cluster_var = DiscreteVariable(
466-
get_next_name(domain, "Cluster"),
467+
get_unique_names(domain, "Cluster"),
467468
values=["C%d" % (x + 1) for x in range(km.k)]
468469
)
469470
clust_ids = km(self.data)
470-
silhouette_var = ContinuousVariable(get_next_name(domain, "Silhouette"))
471+
silhouette_var = ContinuousVariable(
472+
get_unique_names(domain, "Silhouette"))
471473
if km.silhouette_samples is not None:
472474
self.Warning.no_silhouettes.clear()
473475
scores = np.arctan(km.silhouette_samples) / np.pi + 0.5

Orange/widgets/unsupervised/owlouvainclustering.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616

1717
from Orange.clustering.louvain import table_to_knn_graph, Louvain
1818
from Orange.data import Table, DiscreteVariable
19+
from Orange.data.util import get_unique_names
1920
from Orange.projection import PCA
2021
from Orange.widgets import widget, gui, report
2122
from Orange.widgets.settings import DomainContextHandler, ContextSetting, \
2223
Setting
23-
from Orange.widgets.utils.annotated_data import get_next_name, add_columns, \
24+
from Orange.widgets.utils.annotated_data import add_columns, \
2425
ANNOTATED_DATA_SIGNAL_NAME
2526
from Orange.widgets.utils.concurrent import FutureWatcher
2627
from Orange.widgets.utils.signals import Input, Output
@@ -358,7 +359,7 @@ def _send_data(self):
358359
new_partition = list(map(index_map.get, self.partition))
359360

360361
cluster_var = DiscreteVariable(
361-
get_next_name(domain, 'Cluster'),
362+
get_unique_names(domain, 'Cluster'),
362363
values=['C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))]
363364
)
364365

Orange/widgets/utils/annotated_data.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
from itertools import chain
2-
31
import numpy as np
42
from Orange.data import Domain, DiscreteVariable
5-
from Orange.data.util import get_indices
3+
from Orange.data.util import get_unique_names
64

75
ANNOTATED_DATA_SIGNAL_NAME = "Data"
86
ANNOTATED_DATA_FEATURE_NAME = "Selected"
@@ -32,28 +30,8 @@ def add_columns(domain, attributes=(), class_vars=(), metas=()):
3230
return Domain(attributes, class_vars, metas)
3331

3432

35-
def get_next_name(names, name):
36-
"""
37-
Returns next 'possible' attribute name. The name should not be duplicated
38-
and is generated using name parameter, appended by smallest possible index.
39-
40-
:param names: list
41-
:param name: str
42-
:return: str
43-
"""
44-
if isinstance(names, Domain):
45-
names = [
46-
var.name
47-
for var in chain(names.attributes, names.class_vars, names.metas)
48-
]
49-
indexes = get_indices(names, name)
50-
if name not in names and not indexes:
51-
return name
52-
return "{} ({})".format(name, max(indexes, default=0) + 1)
53-
54-
5533
def _table_with_annotation_column(data, values, column_data, var_name):
56-
var = DiscreteVariable(get_next_name(data.domain, var_name), values)
34+
var = DiscreteVariable(get_unique_names(data.domain, var_name), values)
5735
class_vars, metas = data.domain.class_vars, data.domain.metas
5836
if not data.domain.class_vars:
5937
class_vars += (var, )

Orange/widgets/utils/tests/test_annotated_data.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,11 @@
55

66
from Orange.data import Table, Variable
77
from Orange.data.filter import SameValue
8-
from Orange.data.util import get_unique_names
98
from Orange.widgets.utils.annotated_data import (
10-
create_annotated_table, get_next_name,
11-
create_groups_table, ANNOTATED_DATA_FEATURE_NAME
9+
create_annotated_table, create_groups_table, ANNOTATED_DATA_FEATURE_NAME
1210
)
1311

1412

15-
class TestGetNextName(unittest.TestCase):
16-
def test_get_var_name(self):
17-
self.assertEqual(get_next_name(["a"], "XX"), "XX")
18-
self.assertEqual(get_next_name(["a", "XX"], "XX"), "XX (1)")
19-
self.assertEqual(get_next_name(["a", "XX (4)"], "XX"), "XX (5)")
20-
self.assertEqual(get_next_name(["a", "XX", "XX (4)"], "XX"), "XX (5)")
21-
22-
2313
class TestAnnotatedData(unittest.TestCase):
2414
def setUp(self):
2515
Variable._clear_all_caches() # pylint: disable=protected-access
@@ -113,12 +103,6 @@ def test_cascade_annotated_tables_with_missing_annotated_feature(self):
113103
self.assertEqual(data.domain.metas[1].name,
114104
"{} ({})".format(ANNOTATED_DATA_FEATURE_NAME, 4))
115105

116-
def test_get_unique_names(self):
117-
names = ["charlie", "bravo", "charlie (2)", "charlie (3)", "bravo (2)", "charlie (4)",
118-
"bravo (3)"]
119-
self.assertEqual(get_unique_names(names, ["bravo", "charlie"]),
120-
["bravo (5)", "charlie (5)"])
121-
122106
def test_create_groups_table_include_unselected(self):
123107
group_indices = random.sample(range(0, len(self.zoo)), 20)
124108
selection = np.zeros(len(self.zoo), dtype=np.uint8)

Orange/widgets/visualize/utils/widget.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,8 @@ def _get_projection_data(self):
555555
return data
556556

557557
def _get_projection_variables(self):
558-
domain = self.data.domain
559558
names = get_unique_names(
560-
[v.name for v in domain.variables + domain.metas],
561-
self.embedding_variables_names
562-
)
559+
self.data.domain, self.embedding_variables_names)
563560
return ContinuousVariable(names[0]), ContinuousVariable(names[1])
564561

565562
@staticmethod

0 commit comments

Comments
 (0)