diff --git a/doc/index.rst b/doc/index.rst index 2f338a09..1c65306b 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -12,5 +12,4 @@ Widgets widgets/scoregenes widgets/scorecells widgets/filter - widgets/tsne widgets/louvain diff --git a/doc/widgets/icons/tSNE.png b/doc/widgets/icons/tSNE.png deleted file mode 100644 index 0f6631f4..00000000 Binary files a/doc/widgets/icons/tSNE.png and /dev/null differ diff --git a/doc/widgets/images/tSNE-Example.png b/doc/widgets/images/tSNE-Example.png deleted file mode 100644 index bee6d93d..00000000 Binary files a/doc/widgets/images/tSNE-Example.png and /dev/null differ diff --git a/doc/widgets/images/tSNE-stamped.png b/doc/widgets/images/tSNE-stamped.png deleted file mode 100644 index e9af006c..00000000 Binary files a/doc/widgets/images/tSNE-stamped.png and /dev/null differ diff --git a/doc/widgets/tsne.rst b/doc/widgets/tsne.rst deleted file mode 100644 index 63a76f3c..00000000 --- a/doc/widgets/tsne.rst +++ /dev/null @@ -1,55 +0,0 @@ -t-SNE -===== - -Two-dimensional data projection with t-SNE. - -Inputs - Data - input dataset - Data Subset - subset of instances - -Outputs - Selected Data - instances selected from the plot - Data - data with an additional column showing whether a point is selected - - -The **t-SNE** widget plots the data with a t-distributed stochastic neighbor embedding method. `t-SNE `_ is a dimensionality reduction technique, similar to MDS, where points are mapped to 2-D space by their probability distribution. - -.. figure:: images/tSNE-stamped.png - -1. Number of iterations for optimization and the measure of `perplexity `_. Press Start to (re-)run the optimization. -2. Select the number of PCA components used for projection. -3. Set the color of the displayed points (you will get colors for discrete - values and grey-scale points for continuous). Set shape, size and - label to differentiate between points. Set symbol size and opacity for - all data points. Set jittering to randomly disperse data points. -4. Adjust *plot properties*: - - - *Show legend* displays a legend on the right. Click and drag the legend to move it. - - *Show all data on mouse hover* enables information bubbles if the cursor is placed on a dot. - - *Show class density* colors the graph by class. - - *Label only selected points* allows you to select individual data instances and label them. -5. If *Send selected automatically* is ticked, changes are communicated automatically. - Alternatively, press *Send Selected*. -6. *Select, zoom, pan and zoom to fit* are the options for exploring the graph. - The manual selection of data instances works as an angular/square - selection tool. Double click to move the projection. Scroll in or out - for zoom. -7. Access help, save image or produce a report. - -Example -------- - -We will use :doc:`Single Cell Datasets<./singlecelldatasets>` widget to load *Bone marrow mononuclear cells with AML (sample)* data. Then we will pass it through **k-Means** and select 2 clusters from Silhouette Scores. Ok, it looks like there might be two distinct clusters here. - -But can we find subpopulations in these cells? Let us load *Bone marrow mononuclear cells with AML (markers)* with :doc:`Single Cell Datasets<./singlecelldatasets>`. Now, pass the marker genes to **Data Table** and select, for example, natural killer cells from the list (NKG7). - -Pass the markers and k-Means results to :doc:`Score Cells<./scorecells>` widget and select *geneName* to match markers with genes. Finally, add **t-SNE** to visualize the results. - -In **t-SNE**, use *Scores* attribute to color the points and set their size. We see that killer cells are nicely clustered together and that t-SNE indeed found subpopulations. - -.. figure:: images/tSNE-Example.png - diff --git a/orangecontrib/single_cell/tests/test_owtsne.py b/orangecontrib/single_cell/tests/test_owtsne.py deleted file mode 100644 index dc334748..00000000 --- a/orangecontrib/single_cell/tests/test_owtsne.py +++ /dev/null @@ -1,86 +0,0 @@ -import unittest - -from Orange.data import DiscreteVariable, ContinuousVariable, Domain, Table -from Orange.widgets.tests.base import ( - WidgetTest, WidgetOutputsTestMixin, ProjectionWidgetTestMixin -) -from orangecontrib.single_cell.widgets.owtsne import OWtSNE - - -class TestOWtSNE(WidgetTest, ProjectionWidgetTestMixin, - WidgetOutputsTestMixin): - @classmethod - def setUpClass(cls): - super().setUpClass() - WidgetOutputsTestMixin.init(cls) - cls.same_input_output_domain = False - - cls.signal_name = "Data" - cls.signal_data = cls.data - - def setUp(self): - self.widget = self.create_widget(OWtSNE) - - self.class_var = DiscreteVariable('Stage name', values=['STG1', 'STG2']) - self.attributes = [ContinuousVariable('GeneName' + str(i)) for i in range(5)] - self.domain = Domain(self.attributes, class_vars=self.class_var) - self.empty_domain = Domain([], class_vars=self.class_var) - - def test_wrong_input(self): - # no data - self.data = None - self.send_signal(self.widget.Inputs.data, self.data) - self.assertIsNone(self.widget.data) - - # <2 rows - self.data = Table(self.domain, [[1, 2, 3, 4, 5, 'STG1']]) - self.send_signal(self.widget.Inputs.data, self.data) - self.assertIsNone(self.widget.data) - self.assertTrue(self.widget.Error.not_enough_rows.is_shown()) - - # no attributes - self.data = Table(self.empty_domain, [['STG1']] * 2) - self.send_signal(self.widget.Inputs.data, self.data) - self.assertIsNone(self.widget.data) - self.assertTrue(self.widget.Error.no_attributes.is_shown()) - - # constant data - self.data = Table(self.domain, [[1, 2, 3, 4, 5, 'STG1']] * 2) - self.send_signal(self.widget.Inputs.data, self.data) - self.assertIsNone(self.widget.data) - self.assertTrue(self.widget.Error.constant_data.is_shown()) - - # correct input - self.data = Table(self.domain, [[1, 2, 3, 4, 5, 'STG1'], - [5, 4, 3, 2, 1, 'STG1']]) - self.send_signal(self.widget.Inputs.data, self.data) - self.assertIsNotNone(self.widget.data) - self.assertFalse(self.widget.Error.not_enough_rows.is_shown()) - self.assertFalse(self.widget.Error.no_attributes.is_shown()) - self.assertFalse(self.widget.Error.constant_data.is_shown()) - - def test_input(self): - self.data = Table(self.domain, [[1, 1, 1, 1, 1, 'STG1'], - [2, 2, 2, 2, 2, 'STG1'], - [4, 4, 4, 4, 4, 'STG2'], - [5, 5, 5, 5, 5, 'STG2']]) - - self.send_signal(self.widget.Inputs.data, self.data) - - def test_attr_models(self): - """Check possible values for 'Color', 'Shape', 'Size' and 'Label'""" - self.send_signal(self.widget.Inputs.data, self.data) - controls = self.widget.controls - for var in self.data.domain.class_vars + self.data.domain.metas: - self.assertIn(var, controls.attr_color.model()) - self.assertIn(var, controls.attr_label.model()) - if var.is_continuous: - self.assertIn(var, controls.attr_size.model()) - self.assertNotIn(var, controls.attr_shape.model()) - if var.is_discrete: - self.assertNotIn(var, controls.attr_size.model()) - self.assertIn(var, controls.attr_shape.model()) - - -if __name__ == '__main__': - unittest.main() diff --git a/orangecontrib/single_cell/widgets/icons/TSNE.svg b/orangecontrib/single_cell/widgets/icons/TSNE.svg deleted file mode 100644 index a217aa9e..00000000 --- a/orangecontrib/single_cell/widgets/icons/TSNE.svg +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/orangecontrib/single_cell/widgets/owtsne.py b/orangecontrib/single_cell/widgets/owtsne.py deleted file mode 100644 index 59c58366..00000000 --- a/orangecontrib/single_cell/widgets/owtsne.py +++ /dev/null @@ -1,351 +0,0 @@ -import os.path -import sys - -import numpy as np -from joblib.memory import Memory - -from AnyQt.QtCore import Qt, QTimer -from AnyQt.QtWidgets import QFormLayout, QApplication - -try: - from MulticoreTSNE import MulticoreTSNE -except ImportError: - MulticoreTSNE = None - -import Orange.data -import Orange.distance -import Orange.misc -import Orange.projection -from Orange.misc.environ import cache_dir -from Orange.widgets import gui -from Orange.widgets.settings import Setting, SettingProvider -from Orange.widgets.visualize.owscatterplotgraph import OWScatterPlotBase -from Orange.widgets.visualize.utils.widget import OWDataProjectionWidget -from Orange.widgets.widget import Msg - - -tsne_cache = os.path.join(cache_dir(), "tsne") -memory = Memory(tsne_cache, verbose=0, bytes_limit=1e8) -memory.reduce_size() - - -@memory.cache -def compute_tsne_embedding(X, perplexity, iter, init): - negative_gradient_method = 'fft' if len(X) > 10000 else 'bh' - neighbor_method = 'approx' if len(X) > 10000 else 'exact' - tsne = Orange.projection.TSNE( - perplexity=perplexity, n_iter=iter, initialization=init, theta=.8, - early_exaggeration_iter=0, negative_gradient_method=negative_gradient_method, - neighbors=neighbor_method, - ) - tsne_model = tsne.fit(X) - return np.asarray(tsne_model, dtype=np.float32) - - -class OWtSNEGraph(OWScatterPlotBase): - def update_coordinates(self): - super().update_coordinates() - if self.scatterplot_item is not None: - self.view_box.setAspectLocked(True, 1) - - -class OWtSNE(OWDataProjectionWidget): - name = "t-SNE" - description = "Two-dimensional data projection with t-SNE." - icon = "icons/TSNE.svg" - priority = 920 - - settings_version = 3 - max_iter = Setting(300) - perplexity = Setting(30) - pca_components = Setting(20) - - GRAPH_CLASS = OWtSNEGraph - graph = SettingProvider(OWtSNEGraph) - embedding_variables_names = ("tsne-x", "tsne-y") - - #: Runtime state - Running, Finished, Waiting = 1, 2, 3 - - class Error(OWDataProjectionWidget.Error): - not_enough_rows = Msg("Input data needs at least 2 rows") - constant_data = Msg("Input data is constant") - no_attributes = Msg("Data has no attributes") - out_of_memory = Msg("Out of memory") - optimization_error = Msg("Error during optimization\n{}") - no_valid_data = Msg("No projection due to no valid data") - - def __init__(self): - super().__init__() - self.pca_data = None - self.embedding = None - self.__invalidated = True - self.__update_loop = None - # timer for scheduling updates - self.__timer = QTimer(self, singleShot=True, interval=1, - timeout=self.__next_step) - self.__state = OWtSNE.Waiting - self.__in_next_step = False - self.__draw_similar_pairs = False - - def _add_controls(self): - self._add_controls_start_box() - super()._add_controls() - # Because sc data frequently has many genes, - # showing all attributes in combo boxes can cause problems - # QUICKFIX: Remove a separator and attributes from order - # (leaving just the class and metas) - self.models = self.graph.gui.points_models - for model in self.models: - model.order = model.order[:-2] - - def _add_controls_start_box(self): - box = gui.vBox(self.controlArea, True) - form = QFormLayout( - labelAlignment=Qt.AlignLeft, - formAlignment=Qt.AlignLeft, - fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow, - verticalSpacing=10 - ) - - form.addRow( - "Max iterations:", - gui.spin(box, self, "max_iter", 1, 2000, step=50)) - - form.addRow( - "Perplexity:", - gui.spin(box, self, "perplexity", 1, 100, step=1)) - - box.layout().addLayout(form) - - gui.separator(box, 10) - self.runbutton = gui.button(box, self, "Run", callback=self._toggle_run) - - gui.separator(box, 10) - gui.hSlider(box, self, "pca_components", label="PCA components:", - minValue=2, maxValue=50, step=1) - - def set_data(self, data): - self.__invalidated = not (self.data and data and - np.array_equal(self.data.X, data.X)) - super().set_data(data) - - def check_data(self): - def error(err): - err() - self.data = None - - super().check_data() - if self.data is not None: - if len(self.data) < 2: - error(self.Error.not_enough_rows) - elif not self.data.domain.attributes: - error(self.Error.no_attributes) - elif not self.data.is_sparse() and \ - np.allclose(self.data.X - self.data.X[0], 0): - error(self.Error.constant_data) - elif not self.data.is_sparse() and \ - np.all(~np.isfinite(self.data.X)): - error(self.Error.no_valid_data) - - def get_embedding(self): - self.valid_data = np.ones(len(self.embedding), dtype=bool) \ - if self.embedding is not None else None - return self.embedding - - def _toggle_run(self): - if self.__state == OWtSNE.Running: - self.stop() - self.commit() - else: - self.start() - - def start(self): - if not self.data or self.__state == OWtSNE.Running: - self.graph.update_coordinates() - elif self.__state in (OWtSNE.Finished, OWtSNE.Waiting): - self.__start() - - def stop(self): - if self.__state == OWtSNE.Running: - self.__set_update_loop(None) - - def pca_preprocessing(self): - if self.pca_data is not None and \ - self.pca_data.X.shape[1] == self.pca_components: - return - pca = Orange.projection.PCA( - n_components=self.pca_components, random_state=0) - model = pca(self.data) - self.pca_data = model(self.data) - - def __start(self): - self.pca_preprocessing() - embedding = 'random' if self.embedding is None else self.embedding - step_size = 50 - - def update_loop(data, max_iter, step, embedding): - """ - return an iterator over successive improved MDS point embeddings. - """ - # NOTE: this code MUST NOT call into QApplication.processEvents - done = False - iterations_done = 0 - - while not done: - step_iter = min(max_iter - iterations_done, step) - embedding = compute_tsne_embedding( - data.X, self.perplexity, step_iter, embedding) - iterations_done += step_iter - if iterations_done >= max_iter: - done = True - - yield embedding, iterations_done / max_iter - - self.__set_update_loop(update_loop( - self.pca_data, self.max_iter, step_size, embedding)) - self.progressBarInit(processEvents=None) - - def __set_update_loop(self, loop): - """ - Set the update `loop` coroutine. - - The `loop` is a generator yielding `(embedding, progress)` - tuples where `embedding` is a `(N, 2) ndarray` of current updated - MDS points, and `progress` a float ratio (0 <= progress <= 1) - - If an existing update coroutine loop is already in place it is - interrupted (i.e. closed). - - .. note:: - The `loop` must not explicitly yield control flow to the event - loop (i.e. call `QApplication.processEvents`) - - """ - if self.__update_loop is not None: - self.__update_loop.close() - self.__update_loop = None - self.progressBarFinished(processEvents=None) - - self.__update_loop = loop - - if loop is not None: - self.setBlocking(True) - self.progressBarInit(processEvents=None) - self.setStatusMessage("Running") - self.runbutton.setText("Stop") - self.__state = OWtSNE.Running - self.__timer.start() - else: - self.setBlocking(False) - self.setStatusMessage("") - self.runbutton.setText("Start") - self.__state = OWtSNE.Finished - self.__timer.stop() - - def __next_step(self): - if self.__update_loop is None: - return - - assert not self.__in_next_step - self.__in_next_step = True - - loop = self.__update_loop - self.Error.out_of_memory.clear() - self.Error.optimization_error.clear() - try: - embedding, progress = next(self.__update_loop) - assert self.__update_loop is loop - except StopIteration: - self.__set_update_loop(None) - self.unconditional_commit() - except MemoryError: - self.Error.out_of_memory() - self.__set_update_loop(None) - except Exception as exc: - self.Error.optimization_error(str(exc)) - self.__set_update_loop(None) - else: - self.progressBarSet(100.0 * progress, processEvents=None) - self.embedding = embedding - self.graph.update_coordinates() - self.graph.update_density() - # schedule next update - self.__timer.start() - - self.__in_next_step = False - - def __invalidate_embedding(self): - if self.data is not None: - self.embedding = np.random.normal(size=(len(self.data), 2)) - - def handleNewSignals(self): - if self.__invalidated: - self.__invalidated = False - self.__invalidate_embedding() - self.setup_plot() - self.embedding = None - self.start() - else: - self.graph.update_coordinates() - self.commit() - - def clear(self): - super().clear() - self.__set_update_loop(None) - self.__state = OWtSNE.Waiting - self.pca_data = None - self.embedding = None - - @classmethod - def migrate_settings(cls, settings, version): - if version < 3: - if "selection_indices" in settings: - settings["selection"] = settings["selection_indices"] - - @classmethod - def migrate_context(cls, context, version): - if version < 3: - values = context.values - values["attr_color"] = values["graph"]["attr_color"] - values["attr_size"] = values["graph"]["attr_size"] - values["attr_shape"] = values["graph"]["attr_shape"] - values["attr_label"] = values["graph"]["attr_label"] - - -def main(argv=None): - if argv is None: - argv = sys.argv - import gc - app = QApplication(list(argv)) - argv = app.arguments() - if len(argv) > 1: - filename = argv[1] - else: - filename = "iris" - - data = Orange.data.Table(filename) - w = OWtSNE() - w.set_data(data) - w.set_subset_data(data[np.random.choice(len(data), 10)]) - w.handleNewSignals() - - w.show() - w.raise_() - rval = app.exec_() - - w.set_subset_data(None) - w.set_data(None) - w.handleNewSignals() - - w.saveSettings() - w.onDeleteWidget() - w.deleteLater() - del w - gc.collect() - app.processEvents() - return rval - - -if __name__ == "__main__": - sys.exit(main())