Skip to content

Commit 42fc2cd

Browse files
authored
Merge pull request #4760 from AndrejaKovacic/domain_checks_visualize
[FIX] Unique domain checks
2 parents aa58911 + cdcf39d commit 42fc2cd

19 files changed

+224
-53
lines changed

Orange/evaluation/testing.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
# pylint: disable=arguments-differ
33
from warnings import warn
44
from collections import namedtuple
5+
from itertools import chain
56
from time import time
67

78
import numpy as np
89

910
import sklearn.model_selection as skl
1011

1112
from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable
13+
from Orange.data.util import get_unique_names
1214

1315
__all__ = ["Results", "CrossValidation", "LeaveOneOut", "TestOnTrainingData",
1416
"ShuffleSplit", "TestOnTestData", "sample", "CrossValidationFeature"]
@@ -259,42 +261,44 @@ def get_augmented_data(self, model_names,
259261
assert self.predicted.shape[0] == len(model_names)
260262

261263
data = self.data[self.row_indices]
262-
class_var = data.domain.class_var
264+
domain = data.domain
265+
class_var = domain.class_var
263266
classification = class_var and class_var.is_discrete
264267

265268
new_meta_attr = []
266269
new_meta_vals = np.empty((len(data), 0))
270+
names = [var.name for var in chain(domain.attributes,
271+
domain.metas,
272+
[class_var])]
267273

268274
if classification:
269275
# predictions
270276
if include_predictions:
271-
new_meta_attr += (
272-
DiscreteVariable(name=name, values=class_var.values)
273-
for name in model_names)
277+
uniq_new, names = self.create_unique_vars(names, model_names, class_var.values)
278+
new_meta_attr += uniq_new
274279
new_meta_vals = np.hstack((new_meta_vals, self.predicted.T))
275280

276281
# probabilities
277282
if include_probabilities:
278-
for name in model_names:
279-
new_meta_attr += (
280-
ContinuousVariable(name=f"{name} ({value})")
281-
for value in class_var.values)
283+
proposed = [f"{name} ({value})" for name in model_names for value in class_var.values]
284+
285+
uniq_new, names = self.create_unique_vars(names, proposed)
286+
new_meta_attr += uniq_new
282287

283288
for i in self.probabilities:
284289
new_meta_vals = np.hstack((new_meta_vals, i))
285290

286291
elif include_predictions:
287292
# regression
288-
new_meta_attr += (ContinuousVariable(name=name)
289-
for name in model_names)
293+
uniq_new, names = self.create_unique_vars(names, model_names)
294+
new_meta_attr += uniq_new
290295
new_meta_vals = np.hstack((new_meta_vals, self.predicted.T))
291296

292297
# add fold info
293298
if self.folds is not None:
294-
new_meta_attr.append(
295-
DiscreteVariable(
296-
name="Fold",
297-
values=[str(i + 1) for i in range(len(self.folds))]))
299+
values = [str(i + 1) for i in range(len(self.folds))]
300+
uniq_new, names = self.create_unique_vars(names, ["Fold"], values)
301+
new_meta_attr += uniq_new
298302
fold = np.empty((len(data), 1))
299303
for i, s in enumerate(self.folds):
300304
fold[s, 0] = i
@@ -311,6 +315,17 @@ def get_augmented_data(self, model_names,
311315
predictions.name = data.name
312316
return predictions
313317

318+
def create_unique_vars(self, names, proposed_names, values=()):
319+
unique_vars = []
320+
for proposed in proposed_names:
321+
uniq = get_unique_names(names, proposed)
322+
if values:
323+
unique_vars.append(DiscreteVariable(uniq, values))
324+
else:
325+
unique_vars.append(ContinuousVariable(uniq))
326+
names.append(uniq)
327+
return unique_vars, names
328+
314329
def split_by_model(self):
315330
"""
316331
Split evaluation results by models.

Orange/projection/manifold.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import warnings
33
from collections import Iterable
4+
from itertools import chain
45

56
import numpy as np
67
import scipy.sparse as sp
@@ -10,6 +11,7 @@
1011

1112
import Orange
1213
from Orange.data import Table, Domain, ContinuousVariable
14+
from Orange.data.util import get_unique_names
1315
from Orange.distance import Distance, DistanceModel, Euclidean
1416
from Orange.projection import SklProjector, Projector, Projection
1517
from Orange.projection.base import TransformDomain, ComputeValueProjector
@@ -510,7 +512,10 @@ def convert_embedding_to_model(self, data, embedding):
510512
# need the full embedding attributes and is cast into a regular array
511513
n = self.n_components
512514
postfixes = ["x", "y"] if n == 2 else list(range(1, n + 1))
513-
tsne_cols = [ContinuousVariable(f"t-SNE-{p}") for p in postfixes]
515+
names = [var.name for var in chain(data.domain.class_vars, data.domain.metas) if var]
516+
proposed = [(f"t-SNE-{p}") for p in postfixes]
517+
uniq_names = get_unique_names(names, proposed)
518+
tsne_cols = [ContinuousVariable(name) for name in uniq_names]
514519
embedding_domain = Domain(tsne_cols, data.domain.class_vars, data.domain.metas)
515520
embedding_table = Table(embedding_domain, embedding.view(np.ndarray), data.Y, data.metas)
516521

Orange/widgets/data/owfeatureconstructor.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@
2626
QSizePolicy, QAbstractItemView, QComboBox, QFormLayout, QLineEdit,
2727
QHBoxLayout, QVBoxLayout, QStackedWidget, QStyledItemDelegate,
2828
QPushButton, QMenu, QListView, QFrame, QLabel)
29-
from AnyQt.QtGui import QKeySequence
29+
from AnyQt.QtGui import QKeySequence, QColor
3030
from AnyQt.QtCore import Qt, pyqtSignal as Signal, pyqtProperty as Property
3131
from orangewidget.utils.combobox import ComboBoxSearch
3232

3333
import Orange
34+
from Orange.data.util import get_unique_names
3435
from Orange.widgets import gui
3536
from Orange.widgets.settings import ContextSetting, DomainContextHandler
3637
from Orange.widgets.utils import itemmodels, vartype
@@ -55,6 +56,7 @@
5556

5657
StringDescriptor = namedtuple("StringDescriptor", ["name", "expression"])
5758

59+
#warningIcon = gui.createAttributePixmap('!', QColor((202, 0, 32)))
5860

5961
def make_variable(descriptor, compute_value):
6062
if isinstance(descriptor, ContinuousDescriptor):
@@ -390,6 +392,10 @@ class Error(OWWidget.Error):
390392
more_values_needed = Msg("Categorical feature {} needs more values.")
391393
invalid_expressions = Msg("Invalid expressions: {}.")
392394

395+
class Warning(OWWidget.Warning):
396+
renamed_var = Msg("Recently added variable has been renamed, "
397+
"to avoid duplicates.\n")
398+
393399
def __init__(self):
394400
super().__init__()
395401
self.data = None
@@ -427,16 +433,8 @@ def unique_name(fmt, reserved):
427433
candidates = (fmt.format(i) for i in count(1))
428434
return next(c for c in candidates if c not in reserved)
429435

430-
def reserved_names():
431-
varnames = []
432-
if self.data is not None:
433-
varnames = [var.name for var in
434-
self.data.domain.variables + self.data.domain.metas]
435-
varnames += [desc.name for desc in self.featuremodel]
436-
return set(varnames)
437-
438436
def generate_newname(fmt):
439-
return unique_name(fmt, reserved_names())
437+
return unique_name(fmt, self.reserved_names())
440438

441439
menu = QMenu(self.addbutton)
442440
cont = menu.addAction("Numeric")
@@ -531,8 +529,18 @@ def _on_selectedVariableChanged(self, selected, *_):
531529

532530
def _on_modified(self):
533531
if self.currentIndex >= 0:
532+
self.Warning.clear()
534533
editor = self.editorstack.currentWidget()
535-
self.featuremodel[self.currentIndex] = editor.editorData()
534+
proposed = editor.editorData().name
535+
unique = get_unique_names(self.reserved_names(self.currentIndex),
536+
proposed)
537+
538+
feature = editor.editorData()
539+
if editor.editorData().name != unique:
540+
self.Warning.renamed_var()
541+
feature = feature.__class__(unique, *feature[1:])
542+
543+
self.featuremodel[self.currentIndex] = feature
536544
self.descriptors = list(self.featuremodel)
537545

538546
def setDescriptors(self, descriptors):
@@ -542,6 +550,15 @@ def setDescriptors(self, descriptors):
542550
self.descriptors = descriptors
543551
self.featuremodel[:] = list(self.descriptors)
544552

553+
def reserved_names(self, idx_=None):
554+
varnames = []
555+
if self.data is not None:
556+
varnames = [var.name for var in
557+
self.data.domain.variables + self.data.domain.metas]
558+
varnames += [desc.name for idx, desc in enumerate(self.featuremodel)
559+
if idx != idx_]
560+
return set(varnames)
561+
545562
@Inputs.data
546563
@check_sql_input
547564
def setData(self, data=None):

Orange/widgets/data/tests/test_owfeatureconstructor.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,17 @@ def test_error_invalid_expression(self):
329329
self.widget.apply()
330330
self.assertTrue(self.widget.Error.invalid_expressions.is_shown())
331331

332+
def test_renaming_duplicate_vars(self):
333+
data = Table("iris")
334+
self.widget.setData(data)
335+
self.widget.addFeature(
336+
ContinuousDescriptor("iris", "0", 3)
337+
)
338+
self.widget.apply()
339+
output = self.get_output(self.widget.Outputs.data)
340+
self.assertEqual(len(set(var.name for var in output.domain.variables)),
341+
len(output.domain.variables))
342+
332343
def test_discrete_no_values(self):
333344
"""
334345
Should not fail when there are no values set.

Orange/widgets/evaluate/owconfusionmatrix.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Confusion matrix widget"""
22

33
from math import isnan, isinf
4+
from itertools import chain
45
import unicodedata
56

67
from AnyQt.QtWidgets import QTableView, QHeaderView, QStyledItemDelegate, \
@@ -11,6 +12,7 @@
1112
import sklearn.metrics as skl_metrics
1213

1314
import Orange
15+
from Orange.data.util import get_unique_names
1416
import Orange.evaluation
1517
from Orange.widgets import widget, gui
1618
from Orange.widgets.settings import \
@@ -371,13 +373,16 @@ def _prepare_data(self):
371373
extra = []
372374
class_var = self.data.domain.class_var
373375
metas = self.data.domain.metas
376+
attrs = self.data.domain.attributes
377+
names = [var.name for var in chain(metas, [class_var], attrs)]
374378

375379
if self.append_predictions:
376380
extra.append(predicted.reshape(-1, 1))
381+
proposed = "{}({})".format(class_var.name, learner_name)
382+
name = get_unique_names(names, proposed)
377383
var = Orange.data.DiscreteVariable(
378-
"{}({})".format(class_var.name, learner_name),
379-
class_var.values
380-
)
384+
name,
385+
class_var.values)
381386
metas = metas + (var,)
382387

383388
if self.append_probabilities and \

Orange/widgets/evaluate/owpredictions.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections import namedtuple
22
from functools import partial
33
from operator import itemgetter
4+
from itertools import chain
45

56
import numpy
67
from AnyQt.QtWidgets import (
@@ -18,6 +19,7 @@
1819
from Orange.base import Model
1920
from Orange.data import ContinuousVariable, DiscreteVariable, Value, Domain
2021
from Orange.data.table import DomainTransformationError
22+
from Orange.data.util import get_unique_names
2123
from Orange.widgets import gui, settings
2224
from Orange.widgets.evaluate.utils import (
2325
ScoreTable, usable_scorers, learner_name, scorer_caller)
@@ -542,7 +544,17 @@ def _commit_predictions(self):
542544
self._add_regression_out_columns(slot, newmetas, newcolumns)
543545

544546
attrs = list(self.data.domain.attributes)
545-
metas = list(self.data.domain.metas) + newmetas
547+
metas = list(self.data.domain.metas)
548+
names = [var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var]
549+
uniq_newmetas = []
550+
for new_ in newmetas:
551+
uniq = get_unique_names(names, new_.name)
552+
if uniq != new_.name:
553+
new_ = new_.copy(name=uniq)
554+
uniq_newmetas.append(new_)
555+
names.append(uniq)
556+
557+
metas += uniq_newmetas
546558
domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
547559
predictions = self.data.transform(domain)
548560
if newcolumns:

Orange/widgets/evaluate/tests/test_owconfusionmatrix.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from Orange.widgets.evaluate.owconfusionmatrix import OWConfusionMatrix
1010
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
1111
from Orange.widgets.utils.state_summary import format_summary_details
12+
from Orange.widgets.tests.utils import possible_duplicate_table
1213

1314

1415
class TestOWConfusionMatrix(WidgetTest, WidgetOutputsTestMixin):
@@ -135,3 +136,12 @@ def test_summary(self):
135136
self.send_signal(self.widget.Inputs.evaluation_results, None)
136137
self.assertEqual(info._StateInfo__output_summary.brief, "")
137138
self.assertEqual(info._StateInfo__output_summary.details, no_output)
139+
140+
def test_unique_output_domain(self):
141+
bayes = NaiveBayesLearner()
142+
common = dict(k=3, store_data=True)
143+
data = possible_duplicate_table('iris(Learner #1)')
144+
input_data = CrossValidation(data, [bayes], **common)
145+
self.send_signal(self.widget.Inputs.evaluation_results, input_data)
146+
output = self.get_output(self.widget.Outputs.annotated_data)
147+
self.assertEqual(output.domain.metas[0].name, 'iris(Learner #1) (1)')

Orange/widgets/evaluate/tests/test_owpredictions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
from Orange.data import Table, Domain, DiscreteVariable
1818
from Orange.modelling import ConstantLearner, TreeLearner
1919
from Orange.evaluation import Results
20-
from Orange.widgets.tests.utils import excepthook_catch
20+
from Orange.widgets.tests.utils import excepthook_catch, \
21+
possible_duplicate_table
2122
from Orange.widgets.utils.colorpalette import ColorPaletteGenerator
2223

2324

@@ -414,6 +415,15 @@ def test_colors_continuous(self):
414415

415416
self.widget.send_report() # just a quick check that it doesn't crash
416417

418+
def test_unique_output_domain(self):
419+
data = possible_duplicate_table('constant')
420+
predictor = ConstantLearner()(data)
421+
self.send_signal(self.widget.Inputs.data, data)
422+
self.send_signal(self.widget.Inputs.predictors, predictor)
423+
424+
output = self.get_output(self.widget.Outputs.predictions)
425+
self.assertEqual(output.domain.metas[0].name, 'constant (1)')
426+
417427

418428
if __name__ == "__main__":
419429
import unittest

Orange/widgets/evaluate/tests/test_owtestandscore.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from AnyQt.QtTest import QTest
1010
import baycomp
1111

12-
from Orange.classification import MajorityLearner, LogisticRegressionLearner
12+
from Orange.classification import MajorityLearner, LogisticRegressionLearner, \
13+
RandomForestLearner
1314
from Orange.classification.majority import ConstantModel
1415
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
1516
from Orange.evaluation import Results, TestOnTestData, scoring
@@ -24,7 +25,7 @@
2425
from Orange.widgets.settings import (
2526
ClassValuesContextHandler, PerfectDomainContextHandler)
2627
from Orange.widgets.tests.base import WidgetTest
27-
from Orange.widgets.tests.utils import simulate
28+
from Orange.widgets.tests.utils import simulate, possible_duplicate_table
2829
from Orange.widgets.utils.state_summary import (format_summary_details,
2930
format_multiple_summaries)
3031
from Orange.tests import test_filename
@@ -678,6 +679,14 @@ def test_summary(self):
678679
self.assertEqual(info._StateInfo__output_summary.brief, "")
679680
self.assertEqual(info._StateInfo__output_summary.details, no_output)
680681

682+
def test_unique_output_domain(self):
683+
data = possible_duplicate_table('random forest')
684+
self.send_signal(self.widget.Inputs.train_data, data)
685+
self.send_signal(self.widget.Inputs.learner, RandomForestLearner(), 0)
686+
output = self.get_output(self.widget.Outputs.predictions)
687+
self.assertEqual(output.domain.metas[0].name, 'random forest (1)')
688+
689+
681690
class TestHelpers(unittest.TestCase):
682691
def test_results_one_vs_rest(self):
683692
data = Table(test_filename("datasets/lenses.tab"))

0 commit comments

Comments
 (0)