|
9 | 9 | import numpy as np |
10 | 10 | from scipy.sparse import csr_matrix |
11 | 11 |
|
12 | | -from Orange.data import Table |
| 12 | +from Orange.data import Table, Domain, ContinuousVariable |
13 | 13 | from Orange.preprocess import EntropyMDL, DoNotImpute, Default, Average, \ |
14 | 14 | SelectRandomFeatures, EqualFreq, RemoveNaNColumns, DropInstances, \ |
15 | 15 | EqualWidth, SelectBestFeatures, RemoveNaNRows, Preprocess, Scale, \ |
16 | 16 | Randomize, Continuize, Discretize, Impute, SklImpute, Normalize, \ |
17 | | - ProjectCUR, ProjectPCA, RemoveConstant, AdaptiveNormalize |
| 17 | + ProjectCUR, ProjectPCA, RemoveConstant, AdaptiveNormalize, RemoveSparse |
18 | 18 | from Orange.util import OrangeDeprecationWarning |
19 | 19 |
|
20 | 20 |
|
@@ -126,7 +126,7 @@ def test_reprs(self): |
126 | 126 | Randomize, ProjectPCA, ProjectCUR, Scale, |
127 | 127 | EqualFreq, EqualWidth, EntropyMDL, SelectBestFeatures, |
128 | 128 | SelectRandomFeatures, RemoveNaNColumns, DoNotImpute, DropInstances, |
129 | | - Average, Default] |
| 129 | + Average, Default, RemoveSparse] |
130 | 130 |
|
131 | 131 | for preproc in preprocs: |
132 | 132 | repr_str = repr(preproc()) |
@@ -176,3 +176,28 @@ def test_sparse_pps(self): |
176 | 176 | true_out = Scale(center=Scale.NoCentering, scale=Scale.Span)(self.data) |
177 | 177 | np.testing.assert_array_equal(out, true_out) |
178 | 178 | self.data = self.data.X.toarray() |
| 179 | + |
| 180 | + |
| 181 | +class TestRemoveSparse(unittest.TestCase): |
| 182 | + |
| 183 | + def setUp(self): |
| 184 | + domain = Domain([ContinuousVariable('a'), ContinuousVariable('b')]) |
| 185 | + self.data = Table.from_numpy(domain, np.zeros((3, 2))) |
| 186 | + self.data[1:, 1] = 7 |
| 187 | + |
| 188 | + def test_dense(self): |
| 189 | + true_out = self.data[:, 1] |
| 190 | + true_out.X = true_out.X.reshape(-1, 1) |
| 191 | + out = RemoveSparse(0.5)(self.data) |
| 192 | + np.testing.assert_array_equal(out, true_out) |
| 193 | + |
| 194 | + def test_sparse(self): |
| 195 | + true_out = self.data[:, 1] |
| 196 | + self.data.X = csr_matrix(self.data.X) |
| 197 | + true_out.X = csr_matrix(true_out.X) |
| 198 | + out = RemoveSparse(0.5)(self.data).X |
| 199 | + np.testing.assert_array_equal(out, true_out) |
| 200 | + |
| 201 | + |
| 202 | +if __name__ == '__main__': |
| 203 | + unittest.main() |
0 commit comments