99
1010import Orange
1111from Orange .data import StringVariable , ContinuousVariable , Variable
12- from Orange .data .util import hstack
12+ from Orange .data .util import hstack , get_unique_names_duplicates
1313from Orange .widgets import widget , gui
1414from Orange .widgets .settings import Setting
1515from Orange .widgets .utils .itemmodels import DomainModel
@@ -217,13 +217,14 @@ class Outputs:
217217 resizing_enabled = False
218218
219219 class Warning (widget .OWWidget .Warning ):
220- duplicate_names = Msg ("Duplicate variable names in output." )
220+ renamed_vars = Msg ("Some variables have been renamed "
221+ "to avoid duplicates.\n {}" )
221222
222223 class Error (widget .OWWidget .Error ):
223224 matching_numeric_with_nonnum = Msg (
224- "Numeric and non-numeric columns ({} and {}) can't be matched." )
225- matching_index_with_sth = Msg ("Row index cannot by matched with {}." )
226- matching_id_with_sth = Msg ("Instance cannot by matched with {}." )
225+ "Numeric and non-numeric columns ({} and {}) cannot be matched." )
226+ matching_index_with_sth = Msg ("Row index cannot be matched with {}." )
227+ matching_id_with_sth = Msg ("Instance cannot be matched with {}." )
227228 nonunique_left = Msg (
228229 "Some combinations of values on the left appear in multiple rows.\n "
229230 "For this type of merging, every possible combination of values "
@@ -379,19 +380,9 @@ def dataInfoText(data):
379380 f"{ len (data .domain ) + len (data .domain .metas )} variables"
380381
381382 def commit (self ):
382- self .Error .clear ()
383- self .Warning .duplicate_names .clear ()
384- if not self .data or not self .extra_data :
385- merged_data = None
386- else :
387- merged_data = self .merge ()
388- if merged_data :
389- merged_domain = merged_data .domain
390- var_names = [var .name for var in chain (merged_domain .variables ,
391- merged_domain .metas )]
392- if len (set (var_names )) != len (var_names ):
393- self .Warning .duplicate_names ()
394- self .Outputs .data .send (merged_data )
383+ self .clear_messages ()
384+ merged = None if not self .data or not self .extra_data else self .merge ()
385+ self .Outputs .data .send (merged )
395386
396387 def send_report (self ):
397388 # pylint: disable=invalid-sequence-index
@@ -544,6 +535,7 @@ def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu):
544535 domain = Orange .data .Domain (
545536 * (getattr (self .data .domain , x ) + getattr (reduced_extra .domain , x )
546537 for x in ("attributes" , "class_vars" , "metas" )))
538+ domain = self ._domain_rename_duplicates (domain )
547539 X = self ._join_array_by_indices (self .data .X , reduced_extra .X , lefti , righti )
548540 Y = self ._join_array_by_indices (
549541 np .c_ [self .data .Y ], np .c_ [reduced_extra .Y ], lefti , righti )
@@ -566,6 +558,29 @@ def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu):
566558
567559 return table
568560
561+ def _domain_rename_duplicates (self , domain ):
562+ """Check for duplicate variable names in domain. If any, rename
563+ the variables, by replacing them with new ones (names are
564+ appended a number). """
565+ attrs , cvars , metas = [], [], []
566+ n_attrs , n_cvars , n_metas = (len (domain .attributes ),
567+ len (domain .class_vars ), len (domain .metas ))
568+ lists = [attrs ] * n_attrs + [cvars ] * n_cvars + [metas ] * n_metas
569+
570+ variables = domain .variables + domain .metas
571+ proposed_names = [m .name for m in variables ]
572+ unique_names = get_unique_names_duplicates (proposed_names )
573+ duplicates = set ()
574+ for p_name , u_name , var , c in zip (proposed_names , unique_names ,
575+ variables , lists ):
576+ if p_name != u_name :
577+ duplicates .add (p_name )
578+ var = var .copy (name = u_name )
579+ c .append (var )
580+ if duplicates :
581+ self .Warning .renamed_vars (", " .join (duplicates ))
582+ return Orange .data .Domain (attrs , cvars , metas )
583+
569584 @staticmethod
570585 def _join_array_by_indices (left , right , lefti , righti , string_cols = None ):
571586 """Join (horizontally) two arrays, taking pairs of rows given in indices
0 commit comments