|
6 | 6 | from AnyQt.QtWidgets import QFormLayout |
7 | 7 |
|
8 | 8 | from Orange.data import Table, Domain |
9 | | -from Orange.preprocess.preprocess import Preprocess, ApplyDomain |
| 9 | +from Orange.preprocess.preprocess import Preprocess, ApplyDomain, Normalize |
10 | 10 | from Orange.projection import PCA, TSNE |
11 | 11 | from Orange.projection.manifold import TSNEModel |
12 | 12 | from Orange.widgets import gui |
@@ -76,6 +76,7 @@ class OWtSNE(OWDataProjectionWidget): |
76 | 76 | multiscale = Setting(True) |
77 | 77 | exaggeration = Setting(1) |
78 | 78 | pca_components = Setting(20) |
| 79 | + normalize = Setting(True) |
79 | 80 |
|
80 | 81 | GRAPH_CLASS = OWtSNEGraph |
81 | 82 | graph = SettingProvider(OWtSNEGraph) |
@@ -143,15 +144,25 @@ def _add_controls_start_box(self): |
143 | 144 | sbp = gui.hBox(self.controlArea, False, addToLayout=False) |
144 | 145 | gui.hSlider( |
145 | 146 | sbp, self, "pca_components", minValue=2, maxValue=50, step=1, |
146 | | - callback=self._params_changed |
| 147 | + callback=self._invalidate_pca_projection |
147 | 148 | ) |
148 | 149 | form.addRow("PCA components:", sbp) |
149 | 150 |
|
| 151 | + self.normalize_cbx = gui.checkBox( |
| 152 | + box, self, "normalize", "Normalize data", |
| 153 | + callback=self._invalidate_pca_projection, |
| 154 | + ) |
| 155 | + form.addRow(self.normalize_cbx) |
| 156 | + |
150 | 157 | box.layout().addLayout(form) |
151 | 158 |
|
152 | 159 | gui.separator(box, 10) |
153 | 160 | self.runbutton = gui.button(box, self, "Run", callback=self._toggle_run) |
154 | 161 |
|
| 162 | + def _invalidate_pca_projection(self): |
| 163 | + self.pca_data = None |
| 164 | + self._params_changed() |
| 165 | + |
155 | 166 | def _params_changed(self): |
156 | 167 | self.__state = OWtSNE.Finished |
157 | 168 | self.__set_update_loop(None) |
@@ -215,10 +226,26 @@ def stop(self): |
215 | 226 | def resume(self): |
216 | 227 | self.__set_update_loop(self.tsne_iterator) |
217 | 228 |
|
| 229 | + def set_data(self, data: Table): |
| 230 | + super().set_data(data) |
| 231 | + |
| 232 | + if data is not None: |
| 233 | + # PCA doesn't support normalization on sparse data, as this would |
| 234 | + # require centering and normalizing the matrix |
| 235 | + self.normalize_cbx.setDisabled(data.is_sparse()) |
| 236 | + self.normalize = False |
| 237 | + |
218 | 238 | def pca_preprocessing(self): |
219 | | - if self.pca_data is not None and self.pca_data.X.shape[1] == self.pca_components: |
| 239 | + """Perform PCA preprocessing before passing off the data to t-SNE.""" |
| 240 | + if self.pca_data is not None: |
220 | 241 | return |
| 242 | + |
221 | 243 | projector = PCA(n_components=self.pca_components, random_state=0) |
| 244 | + # If the normalization box is ticked, we'll add the `Normalize` |
| 245 | + # preprocessor to PCA |
| 246 | + if self.normalize: |
| 247 | + projector.preprocessors += (Normalize(),) |
| 248 | + |
222 | 249 | model = projector(self.data) |
223 | 250 | self.pca_data = model(self.data) |
224 | 251 |
|
|
0 commit comments