Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 33 additions & 34 deletions Orange/widgets/visualize/owvenndiagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from AnyQt.QtCore import pyqtSignal as Signal

import Orange.data
import Orange.statistics.util as util
from Orange.widgets import widget, gui, settings
from Orange.widgets.utils import itemmodels, colorpalette
from Orange.widgets.utils.annotated_data import (create_annotated_table,
Expand Down Expand Up @@ -174,7 +175,7 @@ def handleNewSignals(self):
has_identifiers = all(source_attributes(input.table.domain)
for input in self.data.values())
has_any_identifiers = any(source_attributes(input.table.domain)
for input in self.data.values())
for input in self.data.values())
self.useequalityButton.setEnabled(samedomain)
self.useidentifiersButton.setEnabled(
has_any_identifiers or len(self.data) == 0)
Expand Down Expand Up @@ -494,7 +495,7 @@ def _on_inputAttrActivated(self, attr_index):
index = i
break

assert (index is not None)
assert index is not None

key, _ = inputs[index]

Expand Down Expand Up @@ -713,13 +714,9 @@ def table_concat(tables):
variables_seen.update(table.domain.metas)

domain = Orange.data.Domain(attributes, class_vars, metas)
new_table = Orange.data.Table(domain)

for table in tables:
new_table.extend(Orange.data.Table.from_table(domain, table))
new_table.attributes.update(table.attributes)

return new_table
tables = [tab.transform(domain) for tab in tables]
return tables[0].concatenate(tables, axis=0)


def copy_descriptor(descriptor, newname=None):
Expand Down Expand Up @@ -788,10 +785,12 @@ def inst_key(inst, vars):
# each instance in this list belongs to one group (but not all
# groups need to be present).
inst_by_id = defaultdict(list)
id_by_inst = defaultdict(list) # inverse mapping

for i in range(len(table)):
inst_id = instance_ids[i]
inst_by_id[inst_id].append(i)
id_by_inst[i] = inst_id

newfeatures = []
newclass_vars = []
Expand Down Expand Up @@ -829,21 +828,27 @@ def expanded(feat):

domain = Orange.data.Domain(newfeatures, newclass_vars, newmetas)
prototype_indices = [inst_by_id[inst_id][0] for inst_id in ids]
newtable = Orange.data.Table.from_table(domain, table)[prototype_indices]
newtable = table[prototype_indices].transform(domain)
in_expanded = set(f for efd in expanded_features.values() for f in efd.values())

# Fill-in nan values
for var in domain.variables + domain.metas:
if var in idvarlist or var in in_expanded:
continue
col, _ = newtable.get_column_view(var)
nan_indices = (i for i in col.nonzero()[0]
if isinstance(col[i], str) or numpy.isnan(col[i]))
for i in nan_indices:
for ind in inst_by_id[ids[i]]:
if not numpy.isnan(table[ind, var]):
newtable[i, var] = table[ind, var]
break

# Fill-in expanded features if any
for i, inst_id in enumerate(ids):
indices = inst_by_id[inst_id]
instance = newtable[i]

for var in domain.variables + domain.metas:
if var in idvarlist or var in in_expanded:
continue
if numpy.isnan(instance[var]):
for ind in indices:
if not numpy.isnan(table[ind, var]):
newtable[i, var] = table[ind, var]

for index in indices:
source_inst = table[index]
group = instance_groups[index]
Expand Down Expand Up @@ -891,14 +896,13 @@ def varying_between(table, idvar):
for indices in idmap.values():
subset = table[indices]
for var in list(candidate_set):
values, _ = subset.get_column_view(var)
column, _ = subset.get_column_view(var)
values = util.unique(column)

if var.is_string:
uniq = set(values)
else:
uniq = unique_non_nan(values)
if not var.is_string:
values = unique_non_nan(values)

if len(uniq) > 1:
if len(values) > 1:
varying.add(var)
candidate_set.remove(var)

Expand Down Expand Up @@ -932,16 +936,14 @@ def string_attributes(domain):
"""
Return all string attributes from the domain.
"""
return [attr for attr in domain.variables + domain.metas
if attr.is_string]
return [attr for attr in domain.variables + domain.metas if attr.is_string]


def discrete_attributes(domain):
"""
Return all discrete attributes from the domain.
"""
return [attr for attr in domain.variables + domain.metas
if attr.is_discrete]
return [attr for attr in domain.variables + domain.metas if attr.is_discrete]


def source_attributes(domain):
Expand Down Expand Up @@ -1432,8 +1434,7 @@ def subset_anchors(shapes):
unit_point(270, r=0.35), # C
unit_point(210, r=0.27), # AC
unit_point(330, r=0.27), # BC
unit_point(0, r=0), # ABC
]
unit_point(0, r=0),] # ABC
elif n == 4:
anchors = [
(0.400, 0.110), # A
Expand Down Expand Up @@ -1616,7 +1617,6 @@ def append_column(data, where, variable, column):
attr = domain.attributes
class_vars = domain.class_vars
metas = domain.metas

if where == "X":
attr = attr + (variable,)
X = numpy.hstack((X, column))
Expand All @@ -1629,10 +1629,9 @@ def append_column(data, where, variable, column):
else:
raise ValueError
domain = Orange.data.Domain(attr, class_vars, metas)
table = Orange.data.Table.from_numpy(domain, X, Y, M, W if W.size else None)
table.ids = data.ids
table.attributes = data.attributes
return table
new_data = data.transform(domain)
new_data[:, variable] = column
return new_data


def drop_columns(data, columns):
Expand Down
35 changes: 33 additions & 2 deletions Orange/widgets/visualize/tests/test_owvenndiagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import unittest
import numpy as np
import scipy.sparse as sp
from collections import defaultdict

from Orange.data import (Table, Domain, StringVariable,
Expand Down Expand Up @@ -75,8 +76,8 @@ def test_venn_diagram(self):
source_var, item_id_var)
temp_m = np.array([[cv[0, i], sources[i], table.metas[0 + i, 0]],
[cv[1, i], sources[i], table.metas[1 + i, 0]],
[cv[2, i], sources[i], table.metas[2 + i, 0]]
], dtype=object)
[cv[2, i], sources[i], table.metas[2 + i, 0]]],
dtype=object)
temp_table = self.add_metas(temp_table, temp_d, temp_m)
tables.append(temp_table)

Expand Down Expand Up @@ -179,6 +180,36 @@ def test_no_data(self):


class GroupTableIndicesTest(unittest.TestCase):

def test_varying_between_combined(self):
X = np.array([[0, 0, 0, 0, 0, 1,],
[0, 0, 1, 1, 0, 1,],
[0, 0, 0, 2, np.nan, np.nan,],
[0, 1, 0, 0, 0, 0,],
[0, 1, 0, 2, 0, 0,],
[0, 1, 0, 0, np.nan, 0,]])

M = np.array([["A", 0, 0, 0, 0, 0, 1,],
["A", 0, 0, 1, 1, 0, 1,],
["A", 0, 0, 0, 2, np.nan, np.nan,],
["B", 0, 1, 0, 0, 0, 0,],
["B", 0, 1, 0, 2, 0, 0,],
["B", 0, 1, 0, 0, np.nan, 0,]], dtype=str)

variables = [ContinuousVariable(name="F%d" % j) for j in range(X.shape[1])]
metas = [StringVariable(name="M%d" % j) for j in range(M.shape[1])]
domain = Domain(attributes=variables, metas=metas)

data = Table.from_numpy(X=X, domain=domain, metas=M)

self.assertEqual(varying_between(data, idvar=data.domain.metas[0]),
[variables[2], variables[3], metas[3], metas[4], metas[5], metas[6]])

data = Table.from_numpy(X=sp.csr_matrix(X), domain=domain, metas=M)
self.assertEqual(varying_between(data, idvar=data.domain.metas[0]),
[variables[2], variables[3], metas[3], metas[4], metas[5], metas[6]])


def test_group_table_indices(self):
table = Table(test_filename("test9.tab"))
dd = defaultdict(list)
Expand Down