@@ -220,8 +220,6 @@ def __init__(self, widget):
220220 self .place_delegate = PlaceDelegate (self , VarTableModel .places )
221221 self .setItemDelegateForColumn (Column .place , self .place_delegate )
222222
223- self .renamed_variables = []
224-
225223 @staticmethod
226224 def _is_missing (x ):
227225 return str (x ) in ("nan" , "" )
@@ -253,21 +251,27 @@ def _merge(cols, force_dense=False):
253251 sparse_cols = [c if sp .issparse (c ) else sp .csc_matrix (c ) for c in cols ]
254252 return sp .hstack (sparse_cols ).tocsr ()
255253
256- def get_domain (self , domain , data ):
257- """Create domain (and dataset) from changes made in the widget.
258-
259- Parameters
260- ----------
261- domain : old domain
262- data : source data
254+ def get_domain (self , domain , data , deduplicate = False ):
255+ """
256+ Create domain (and dataset) from changes made in the widget.
263257
264258 Returns
265259 -------
266- (new_domain, [attribute_columns, class_var_columns, meta_columns])
260+
261+ Args:
262+ domain (Domain): original domain
263+ data (Table): original data
264+ deduplicate (bool): if True, variable names are deduplicated and
265+ the result contains an additional list with names of renamed
266+ variables
267+
268+ Returns:
269+ (new_domain, [attribute_columns, class_var_columns, meta_columns])
270+ or
271+ (new_domain, [attribute_columns, class_var_columns, meta_columns], renamed)
267272 """
268273 # Allow type-checking with type() instead of isinstance() for exact comparison
269274 # pylint: disable=unidiomatic-typecheck
270- self .renamed_variables = []
271275 variables = self .model ().variables
272276 places = [[], [], []] # attributes, class_vars, metas
273277 cols = [[], [], []] # Xcols, Ycols, Mcols
@@ -286,15 +290,17 @@ def numbers_are_round(var, col_data):
286290 chain (((at , Place .feature ) for at in domain .attributes ),
287291 ((cl , Place .class_var ) for cl in domain .class_vars ),
288292 ((mt , Place .meta ) for mt in domain .metas )))):
289- return domain , [data .X , data .Y , data .metas ]
290-
291- unique_names = get_unique_names_duplicates ([var [0 ] for var in variables ])
292- for var , u in zip (variables , unique_names ):
293- if var [0 ] != u :
294- self .renamed_variables .append (var [0 ])
295- var [0 ] = u
296- self .model ().set_variables (variables )
293+ if deduplicate :
294+ return domain , [data .X , data .Y , data .metas ], []
295+ else :
296+ return domain , [data .X , data .Y , data .metas ]
297297
298+ relevant_names = [var [0 ] for var in variables if var [2 ] != Place .skip ]
299+ if deduplicate :
300+ renamed_iter = iter (get_unique_names_duplicates (relevant_names ))
301+ else :
302+ renamed_iter = iter (relevant_names )
303+ renamed = []
298304 for (name , tpe , place , _ , may_be_numeric ), (orig_var , orig_plc ) in \
299305 zip (variables ,
300306 chain ([(at , Place .feature ) for at in domain .attributes ],
@@ -303,24 +309,28 @@ def numbers_are_round(var, col_data):
303309 if place == Place .skip :
304310 continue
305311
312+ new_name = next (renamed_iter )
313+ if new_name != name :
314+ renamed .append (name )
315+
306316 col_data = self ._get_column (data , orig_var , orig_plc )
307317 is_sparse = sp .issparse (col_data )
308318
309- if name == orig_var .name and tpe == type (orig_var ):
319+ if new_name == orig_var .name and tpe == type (orig_var ):
310320 var = orig_var
311321 elif tpe == type (orig_var ):
312- var = orig_var .copy (name = name )
322+ var = orig_var .copy (name = new_name )
313323 elif tpe == DiscreteVariable :
314324 values = list (str (i ) for i in unique (col_data ) if not self ._is_missing (i ))
315325 round_numbers = numbers_are_round (orig_var , col_data )
316326 col_data = [np .nan if self ._is_missing (x ) else values .index (str (x ))
317327 for x in self ._iter_vals (col_data )]
318328 if round_numbers :
319329 values = [str (int (float (v ))) for v in values ]
320- var = tpe (name , values )
330+ var = tpe (new_name , values )
321331 col_data = self ._to_column (col_data , is_sparse )
322332 elif tpe == StringVariable :
323- var = tpe .make (name )
333+ var = tpe .make (new_name )
324334 if type (orig_var ) in [DiscreteVariable , TimeVariable ]:
325335 col_data = [orig_var .repr_val (x ) if not np .isnan (x ) else ""
326336 for x in self ._iter_vals (col_data )]
@@ -334,13 +344,13 @@ def numbers_are_round(var, col_data):
334344 # in metas which are transformed to dense below
335345 col_data = self ._to_column (col_data , False , dtype = object )
336346 elif tpe == ContinuousVariable and type (orig_var ) == DiscreteVariable :
337- var = tpe .make (name )
347+ var = tpe .make (new_name )
338348 if may_be_numeric :
339349 col_data = [np .nan if self ._is_missing (x ) else float (orig_var .values [int (x )])
340350 for x in self ._iter_vals (col_data )]
341351 col_data = self ._to_column (col_data , is_sparse )
342352 else :
343- var = tpe (name )
353+ var = tpe (new_name )
344354 places [place ].append (var )
345355 cols [place ].append (col_data )
346356
@@ -350,7 +360,10 @@ def numbers_are_round(var, col_data):
350360 Y = self ._merge (cols [Place .class_var ], force_dense = True )
351361 m = self ._merge (cols [Place .meta ], force_dense = True )
352362 domain = Domain (* places )
353- return domain , [X , Y , m ]
363+ if deduplicate :
364+ return domain , [X , Y , m ], renamed
365+ else :
366+ return domain , [X , Y , m ]
354367
355368 def _get_column (self , data , source_var , source_place ):
356369 """ Extract column from data and preserve sparsity. """
0 commit comments