Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Orange/widgets/data/icons/SelectByDataIndex.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
106 changes: 106 additions & 0 deletions Orange/widgets/data/owselectbydataindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import numpy as np

from AnyQt.QtWidgets import QApplication

from Orange.data import Table
from Orange.widgets import widget, gui
from Orange.widgets.utils import itemmodels
from Orange.widgets.utils.sql import check_sql_input
from Orange.widgets.widget import Input, Output


class OWSelectByDataIndex(widget.OWWidget):
name = "Select by Data Index"
description = "Match instances by index from data subset."
icon = "icons/SelectByDataIndex.svg"
priority = 1112

class Inputs:
data = Input("Data", Table)
data_subset = Input("Data Subset", Table)

class Outputs:
data = Output("Data", Table)

want_main_area = False
resizing_enabled = False

class Warning(widget.OWWidget.Warning):
instances_not_matching = widget.Msg("Input tables do not share any instances.")

def __init__(self):
super().__init__()

self.data = None
self.data_subset = None

self.model = itemmodels.VariableListModel()
self.model_unique_with_id = itemmodels.VariableListModel()
self.extra_model_unique = itemmodels.VariableListModel()
self.extra_model_unique_with_id = itemmodels.VariableListModel()

box = gui.hBox(self.controlArea, box=None)
self.infoBoxData = gui.label(
box, self, self.data_info_text(None), box="Data")
self.infoBoxExtraData = gui.label(
box, self, self.data_info_text(None), box="Data Subset")

@Inputs.data
@check_sql_input
def set_data(self, data):
self.data = data
self.infoBoxData.setText(self.data_info_text(data))

@Inputs.data_subset
@check_sql_input
def set_data_subset(self, data):
self.data_subset = data
self.infoBoxExtraData.setText(self.data_info_text(data))

def handleNewSignals(self):
self._invalidate()

def data_info_text(self, data):
if data is None:
return "No data."
else:
return "{}\n{} instances\n{} variables".format(
data.name, len(data), len(data.domain) + len(data.domain.metas))

def commit(self):
self.Warning.instances_not_matching.clear()
subset_ids = []
if self.data_subset:
subset_ids = self.data_subset.ids
if not self.data:
output_data = None
else:
if self.data_subset and len(np.intersect1d(subset_ids, self.data.ids)) == 0:
self.Warning.instances_not_matching()
subset_indices = np.in1d(self.data.ids, subset_ids)
output_data = self.data[subset_indices]
self.Outputs.data.send(output_data)

def _invalidate(self):
self.commit()

def send_report(self):
d_text = self.data_info_text(self.data).replace("\n", ", ")
ds_text = self.data_info_text(self.data_subset).replace("\n", ", ")
self.report_items("", [("Data", d_text), ("Data Subset", ds_text)])


def main():
app = QApplication([])
w = OWSelectByDataIndex()
data = Table("iris.tab")
data_subset = data[:20]
w.set_data(data)
w.set_data_subset(data_subset)
w.handleNewSignals()
w.show()
app.exec_()


if __name__ == "__main__":
main()
26 changes: 26 additions & 0 deletions Orange/widgets/data/tests/test_owselectbydataindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from Orange.data import Table, Domain
from Orange.widgets.data.owselectbydataindex import OWSelectByDataIndex
from Orange.widgets.tests.base import WidgetTest


class TestOWSelectSubset(WidgetTest):

def setUp(self):
self.widget = self.create_widget(OWSelectByDataIndex)

def test_subset(self):
data = Table("iris")
data_subset = data[20:40].transform(Domain([])) # destroy domain
self.send_signal(self.widget.Inputs.data, data)
self.send_signal(self.widget.Inputs.data_subset, data_subset)
out = self.get_output(self.widget.Outputs.data)
self.assertEqual(list(data[20:40]), list(out))

def test_subset_nosubset(self):
data = Table("iris")
data_subset = Table("titanic")
self.send_signal(self.widget.Inputs.data, data)
self.send_signal(self.widget.Inputs.data_subset, data_subset)
out = self.get_output(self.widget.Outputs.data)
self.assertTrue(self.widget.Warning.instances_not_matching.is_shown())
self.assertEqual([], list(out))
1 change: 1 addition & 0 deletions doc/visual-programming/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Data
widgets/data/createclass
widgets/data/randomize
widgets/data/concatenate
widgets/data/select-by-data-index
widgets/data/paintdata
widgets/data/pythonscript
widgets/data/featureconstructor
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Select by Data Index
====================

Match instances by index from data subset.

Inputs
Data
reference data set
Data Subset
subset to match

Outputs
Data
subset from reference data set that matches indices from subset data


**Select by Data Index** enables matching the data by indices. Each row in a data set has an index and given a subset, this widget can match these indices to indices from the reference data. Most often it is used to retrieve the original data from the transformed data (say, from PCA space).

.. figure:: images/Select-by-data-index-stamped.png
:scale: 50%

1. Information on the reference data set. This data is used as index reference.
2. Information on the data subset. The indices of this data set are used to find matching data in the reference data set. Matching data are on the output by default.

Example
-------

A typical use of **Select by Data Index** is to retrieve the original data after a transformation. We will load *iris.tab* data in the **File** widget. Then we will transform this data with **PCA**. We can project the transformed data in a **Scatter Plot**, where we can only see PCA components and not the original features.

Now we will select an interesting subset (we could also select the entire data set). If we observe it in a **Data Table**, we can see that the data is transformed. If we would like to see this data with the original features, we will have to retrieve them with **Select by Data Index**.

Connect the original data and the subset from **Scatter Plot** to **Select by Data Index**. The widget will match the indices of the subset with the indices of the reference (original) data and output the matching reference data. A final inspection in another **Data Table** confirms the data on the output is from the original data space.

.. figure:: images/Select-by-Data-Index-Example1.png