Skip to content

Commit 4642f0e

Browse files
committed
File / Domain Editor: Construct domains with unique names
1 parent 35231b7 commit 4642f0e

File tree

2 files changed

+49
-34
lines changed

2 files changed

+49
-34
lines changed

Orange/widgets/data/owfile.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,16 +124,16 @@ class Warning(widget.OWWidget.Warning):
124124
file_too_big = Msg("The file is too large to load automatically."
125125
" Press Reload to load.")
126126
load_warning = Msg("Read warning:\n{}")
127-
performance_warning = widget.Msg(
127+
performance_warning = Msg(
128128
"Categorical variables with >100 values may decrease performance.")
129129
renamed_vars = Msg("Some variables have been renamed "
130130
"to avoid duplicates.\n{}")
131131

132132
class Error(widget.OWWidget.Error):
133-
file_not_found = widget.Msg("File not found.")
134-
missing_reader = widget.Msg("Missing reader.")
135-
sheet_error = widget.Msg("Error listing available sheets.")
136-
unknown = widget.Msg("Read error:\n{}")
133+
file_not_found = Msg("File not found.")
134+
missing_reader = Msg("Missing reader.")
135+
sheet_error = Msg("Error listing available sheets.")
136+
unknown = Msg("Read error:\n{}")
137137

138138
class NoFileSelected:
139139
pass
@@ -484,7 +484,9 @@ def apply_domain_edit(self):
484484
if self.data is None:
485485
table = None
486486
else:
487-
domain, cols = self.domain_editor.get_domain(self.data.domain, self.data)
487+
domain, cols, renamed = \
488+
self.domain_editor.get_domain(self.data.domain, self.data,
489+
deduplicate=True)
488490
if not (domain.variables or domain.metas):
489491
table = None
490492
elif domain is self.data.domain:
@@ -496,8 +498,8 @@ def apply_domain_edit(self):
496498
table.ids = np.array(self.data.ids)
497499
table.attributes = getattr(self.data, 'attributes', {})
498500
self._inspect_discrete_variables(domain)
499-
if self.domain_editor.renamed_variables:
500-
self.Warning.renamed_vars(', '.join(self.domain_editor.renamed_variables))
501+
if renamed:
502+
self.Warning.renamed_vars(', '.join(renamed))
501503

502504
self.Outputs.data.send(table)
503505
self.apply_button.setEnabled(False)

Orange/widgets/utils/domaineditor.py

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,6 @@ def __init__(self, widget):
220220
self.place_delegate = PlaceDelegate(self, VarTableModel.places)
221221
self.setItemDelegateForColumn(Column.place, self.place_delegate)
222222

223-
self.renamed_variables = []
224-
225223
@staticmethod
226224
def _is_missing(x):
227225
return str(x) in ("nan", "")
@@ -253,21 +251,27 @@ def _merge(cols, force_dense=False):
253251
sparse_cols = [c if sp.issparse(c) else sp.csc_matrix(c) for c in cols]
254252
return sp.hstack(sparse_cols).tocsr()
255253

256-
def get_domain(self, domain, data):
257-
"""Create domain (and dataset) from changes made in the widget.
258-
259-
Parameters
260-
----------
261-
domain : old domain
262-
data : source data
254+
def get_domain(self, domain, data, deduplicate=False):
255+
"""
256+
Create domain (and dataset) from changes made in the widget.
263257
264258
Returns
265259
-------
266-
(new_domain, [attribute_columns, class_var_columns, meta_columns])
260+
261+
Args:
262+
domain (Domain): original domain
263+
data (Table): original data
264+
deduplicate (bool): if True, variable names are deduplicated and
265+
the result contains an additional list with names of renamed
266+
variables
267+
268+
Returns:
269+
(new_domain, [attribute_columns, class_var_columns, meta_columns])
270+
or
271+
(new_domain, [attribute_columns, class_var_columns, meta_columns], renamed)
267272
"""
268273
# Allow type-checking with type() instead of isinstance() for exact comparison
269274
# pylint: disable=unidiomatic-typecheck
270-
self.renamed_variables = []
271275
variables = self.model().variables
272276
places = [[], [], []] # attributes, class_vars, metas
273277
cols = [[], [], []] # Xcols, Ycols, Mcols
@@ -286,15 +290,17 @@ def numbers_are_round(var, col_data):
286290
chain(((at, Place.feature) for at in domain.attributes),
287291
((cl, Place.class_var) for cl in domain.class_vars),
288292
((mt, Place.meta) for mt in domain.metas)))):
289-
return domain, [data.X, data.Y, data.metas]
290-
291-
unique_names = get_unique_names_duplicates([var[0] for var in variables])
292-
for var, u in zip(variables, unique_names):
293-
if var[0] != u:
294-
self.renamed_variables.append(var[0])
295-
var[0] = u
296-
self.model().set_variables(variables)
293+
if deduplicate:
294+
return domain, [data.X, data.Y, data.metas], []
295+
else:
296+
return domain, [data.X, data.Y, data.metas]
297297

298+
relevant_names = [var[0] for var in variables if var[2] != Place.skip]
299+
if deduplicate:
300+
renamed_iter = iter(get_unique_names_duplicates(relevant_names))
301+
else:
302+
renamed_iter = iter(relevant_names)
303+
renamed = []
298304
for (name, tpe, place, _, may_be_numeric), (orig_var, orig_plc) in \
299305
zip(variables,
300306
chain([(at, Place.feature) for at in domain.attributes],
@@ -303,24 +309,28 @@ def numbers_are_round(var, col_data):
303309
if place == Place.skip:
304310
continue
305311

312+
new_name = next(renamed_iter)
313+
if new_name != name:
314+
renamed.append(name)
315+
306316
col_data = self._get_column(data, orig_var, orig_plc)
307317
is_sparse = sp.issparse(col_data)
308318

309-
if name == orig_var.name and tpe == type(orig_var):
319+
if new_name == orig_var.name and tpe == type(orig_var):
310320
var = orig_var
311321
elif tpe == type(orig_var):
312-
var = orig_var.copy(name=name)
322+
var = orig_var.copy(name=new_name)
313323
elif tpe == DiscreteVariable:
314324
values = list(str(i) for i in unique(col_data) if not self._is_missing(i))
315325
round_numbers = numbers_are_round(orig_var, col_data)
316326
col_data = [np.nan if self._is_missing(x) else values.index(str(x))
317327
for x in self._iter_vals(col_data)]
318328
if round_numbers:
319329
values = [str(int(float(v))) for v in values]
320-
var = tpe(name, values)
330+
var = tpe(new_name, values)
321331
col_data = self._to_column(col_data, is_sparse)
322332
elif tpe == StringVariable:
323-
var = tpe.make(name)
333+
var = tpe.make(new_name)
324334
if type(orig_var) in [DiscreteVariable, TimeVariable]:
325335
col_data = [orig_var.repr_val(x) if not np.isnan(x) else ""
326336
for x in self._iter_vals(col_data)]
@@ -334,13 +344,13 @@ def numbers_are_round(var, col_data):
334344
# in metas which are transformed to dense below
335345
col_data = self._to_column(col_data, False, dtype=object)
336346
elif tpe == ContinuousVariable and type(orig_var) == DiscreteVariable:
337-
var = tpe.make(name)
347+
var = tpe.make(new_name)
338348
if may_be_numeric:
339349
col_data = [np.nan if self._is_missing(x) else float(orig_var.values[int(x)])
340350
for x in self._iter_vals(col_data)]
341351
col_data = self._to_column(col_data, is_sparse)
342352
else:
343-
var = tpe(name)
353+
var = tpe(new_name)
344354
places[place].append(var)
345355
cols[place].append(col_data)
346356

@@ -350,7 +360,10 @@ def numbers_are_round(var, col_data):
350360
Y = self._merge(cols[Place.class_var], force_dense=True)
351361
m = self._merge(cols[Place.meta], force_dense=True)
352362
domain = Domain(*places)
353-
return domain, [X, Y, m]
363+
if deduplicate:
364+
return domain, [X, Y, m], renamed
365+
else:
366+
return domain, [X, Y, m]
354367

355368
def _get_column(self, data, source_var, source_place):
356369
""" Extract column from data and preserve sparsity. """

0 commit comments

Comments
 (0)