Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
610 changes: 268 additions & 342 deletions Orange/widgets/visualize/owvenndiagram.py

Large diffs are not rendered by default.

108 changes: 107 additions & 1 deletion Orange/widgets/visualize/tests/test_owvenndiagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
drop_columns,
OWVennDiagram,
group_table_indices,
copy_descriptor)
copy_descriptor,
arrays_equal)
from Orange.tests import test_filename


Expand Down Expand Up @@ -124,6 +125,8 @@ def _select_data(self):
return list(range(len(self.signal_data)))

def test_multiple_input(self):
"""Over rows"""
self.widget.rowwise = True
self.send_signal(self.signal_name, self.data[:100], 1)
self.send_signal(self.signal_name, self.data[50:], 2)

Expand Down Expand Up @@ -161,6 +164,49 @@ def test_multiple_input(self):
self.assertIsNone(self.get_output(self.widget.Outputs.selected_data))
self.assertIsNone(self.get_output(self.widget.Outputs.annotated_data))

def test_multiple_input_over_cols(self):
self.widget.rowwise = False
selected_atr_name = 'Selected'
input2 = self.data.transform(Domain([self.data.domain.attributes[0]],
self.data.domain.class_vars,
self.data.domain.metas))
self.send_signal(self.signal_name, self.data, (1, 'Data', None))
self.send_signal(self.signal_name, input2, (2, 'Data', None))

selected = self.get_output(self.widget.Outputs.selected_data)
annotated = self.get_output(self.widget.Outputs.annotated_data)
self.assertIsNone(selected)
self.assertEqual(len(annotated), len(self.data))
self.assertEqual(annotated.domain, self.data.domain)
for atr in annotated.domain.attributes:
self.assertIsNotNone(atr.attributes[selected_atr_name])
self.assertFalse(atr.attributes[selected_atr_name])

# select data instances
self.widget.vennwidget.vennareas()[3].setSelected(True)
np.testing.assert_array_equal(self.get_output(self.widget.Outputs.selected_data).X,
input2.X)
np.testing.assert_array_equal(self.get_output(self.widget.Outputs.selected_data).Y,
input2.Y)
np.testing.assert_array_equal(self.get_output(self.widget.Outputs.selected_data).metas,
input2.metas)

#domain matches but the values do not
input2.X = input2.X - 1
self.send_signal(self.signal_name, input2, (2, 'Data', None))
self.widget.vennwidget.vennareas()[3].setSelected(True)
self.assertEqual(self.get_output(self.widget.Outputs.selected_data).domain.attributes[0].name,
'sepal length-iris1')
self.assertEqual(self.get_output(self.widget.Outputs.selected_data).domain.attributes[1].name,
'sepal length-iris2')

out_domain = self.get_output(self.widget.Outputs.annotated_data).domain.attributes
self.assertTrue(out_domain[0].attributes[selected_atr_name])
self.assertTrue(out_domain[1].attributes[selected_atr_name])
self.assertFalse(out_domain[2].attributes[selected_atr_name])
self.assertFalse(out_domain[3].attributes[selected_atr_name])
self.assertFalse(out_domain[4].attributes[selected_atr_name])

def test_no_data(self):
"""Check that the widget doesn't crash on empty data"""
self.send_signal(self.signal_name, self.data[:0], 1)
Expand Down Expand Up @@ -188,6 +234,51 @@ def test_unconditional_commit_on_new_signal(self):
self.send_signal(self.signal_name, self.data[:100], 1)
commit.assert_called()

def test_input_compatibility(self):
self.widget.rowwise = True
self.send_signal(self.signal_name, self.data, 1)
self.send_signal(self.signal_name,
self.data.transform(Domain([self.data.domain.attributes[0]],
self.data.domain.class_vars,
self.data.domain.metas)), 2)
self.assertFalse(self.widget.Error.instances_mismatch.is_shown())

self.widget.rowwise = False
self.send_signal(self.signal_name, self.data[:100, :], 2)
self.assertTrue(self.widget.Error.instances_mismatch.is_shown())

self.send_signal(self.signal_name, None, 2)
self.assertFalse(self.widget.Error.instances_mismatch.is_shown())

def test_rows_identifiers(self):
self.widget.rowwise = True
data = Table('zoo')
self.send_signal(self.signal_name, data, (1, 'Data', None))
self.widget.selected_feature = 'name'
self.send_signal(self.signal_name, data[:5], (2, 'Data', None))

self.assertIsNone(self.get_output(self.widget.Outputs.selected_data))
self.widget.vennwidget.vennareas()[3].setSelected(True)
selected = self.get_output(self.widget.Outputs.selected_data)
self.assertEqual(len(selected), 5)
self.assertEqual(selected.domain.attributes, data.domain.attributes)
self.assertEqual(selected.domain.class_vars, data.domain.class_vars)

annotated = self.get_output(self.widget.Outputs.annotated_data)
self.assertEqual(len(annotated), 100)

def test_too_many_inputs(self):
self.send_signal(self.signal_name, self.data, 1)
self.send_signal(self.signal_name, self.data, 2)
self.send_signal(self.signal_name, self.data, 3)
self.send_signal(self.signal_name, self.data, 4)
self.send_signal(self.signal_name, self.data, 5)
self.send_signal(self.signal_name, self.data, 6)
self.assertTrue(self.widget.Error.too_many_inputs.is_shown())

self.send_signal(self.signal_name, None, 6)
self.assertFalse(self.widget.Error.too_many_inputs.is_shown())


class GroupTableIndicesTest(unittest.TestCase):

Expand Down Expand Up @@ -278,6 +369,21 @@ class SomeVariable(Variable):
copied = copy_descriptor(var, "cux")
self.assertEqual(copied.name, "cux")

def test_array_equals(self):
a = np.array([1, 2], dtype=np.float64)
b = np.array([1, np.nan], dtype=np.float64)
self.assertTrue(arrays_equal(None, None, None))
self.assertFalse(arrays_equal(None, a, None))
self.assertFalse(arrays_equal(a, None, None))
self.assertFalse(arrays_equal(a, b, ContinuousVariable))
a[1] = np.nan
self.assertTrue(arrays_equal(a, b, ContinuousVariable))
self.assertTrue(arrays_equal(a.astype(str), a.astype(str), StringVariable))
a[1] = 2
b[1] = 3
self.assertFalse(arrays_equal(a, b, ContinuousVariable))
self.assertFalse(arrays_equal(a.astype(str), b.astype(str), StringVariable))


if __name__ == "__main__":
unittest.main()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
15 changes: 7 additions & 8 deletions doc/visual-programming/source/widgets/visualize/venndiagram.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,18 @@ Plots a [Venn diagram](http://en.wikipedia.org/wiki/Venn_diagram) for two or mor
**Outputs**

- Selected Data: instances selected from the plot
- Data: entire data with a column indicating whether an instance was selected or not

The **Venn Diagram** widget displays logical relations between datasets. This projection shows two or more datasets represented by circles of different colors. The intersections are subsets that belong to more than one dataset. To further analyze or visualize the subset, click on the intersection.
The **Venn Diagram** widget displays logical relations between datasets by showing the number of common data instances (rows) or the number of shared features (columns). Selecting a part of the visualization outputs the corresponding instances or features.

![](images/venn-workflow.png)

![](images/venn-identifiers-stamped.png)
![](images/VennDiagram-stamped.png)

1. Information on the input data.
2. Select the identifiers by which to compare the data.
3. Tick *Output duplicates* if you wish to remove duplicates.
4. If *Auto commit* is on, changes are automatically communicated to other widgets. Alternatively, click *Commit*.
5. *Save Image* saves the created image to your computer in a .svg or .png format.
6. Produce a report.
1. Select whether to count common features or instances.
2. Select whether to include duplicates or to output only unique rows (applicable only when matching by instances). If *Auto commit* is on, changes are automatically communicated to other widgets.

Rows can be matched by their identity, e.g. rows from different data sets match if they came from the same row in a file. Instead of using identities, we can choose a string variable to match the rows by. A warning is shown if data sets have no common string variable.

Examples
--------
Expand Down