forked from biolab/orange3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_io.py
More file actions
113 lines (95 loc) · 4.75 KB
/
test_io.py
File metadata and controls
113 lines (95 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import unittest
import numpy as np
from Orange.data import ContinuousVariable, DiscreteVariable, StringVariable, \
TimeVariable
from Orange.data.io_util import guess_data_type
from Orange.misc.collections import natural_sorted
class TestTableFilters(unittest.TestCase):
def test_guess_data_type_continuous(self):
# should be ContinuousVariable
valuemap, values, coltype = guess_data_type(list(range(1, 100)))
self.assertEqual(ContinuousVariable, coltype)
self.assertIsNone(valuemap)
np.testing.assert_array_equal(np.array(list(range(1, 100))), values)
valuemap, values, coltype = guess_data_type([1, 2, 3, 1, 2, 3])
self.assertEqual(ContinuousVariable, coltype)
self.assertIsNone(valuemap)
np.testing.assert_array_equal([1, 2, 3, 1, 2, 3], values)
valuemap, values, coltype = guess_data_type(
["1", "2", "3", "1", "2", "3"])
self.assertEqual(ContinuousVariable, coltype)
self.assertIsNone(valuemap)
np.testing.assert_array_equal([1, 2, 3, 1, 2, 3], values)
def test_guess_data_type_discrete(self):
# should be DiscreteVariable
valuemap, values, coltype = guess_data_type([1, 2, 1, 2])
self.assertEqual(DiscreteVariable, coltype)
self.assertEqual([1, 2], valuemap)
np.testing.assert_array_equal([1, 2, 1, 2], values)
valuemap, values, coltype = guess_data_type(["1", "2", "1", "2", "a"])
self.assertEqual(DiscreteVariable, coltype)
self.assertEqual(["1", "2", "a"], valuemap)
np.testing.assert_array_equal(['1', '2', '1', '2', 'a'], values)
# just below the threshold for string variable
in_values = list(map(lambda x: str(x) + "a", range(24))) + ["a"] * 76
valuemap, values, coltype = guess_data_type(in_values)
self.assertEqual(DiscreteVariable, coltype)
self.assertEqual(natural_sorted(set(in_values)), valuemap)
np.testing.assert_array_equal(in_values, values)
def test_guess_data_type_string(self):
# should be StringVariable
# too many different values for discrete
in_values = list(map(lambda x: str(x) + "a", range(90)))
valuemap, values, coltype = guess_data_type(in_values)
self.assertEqual(StringVariable, coltype)
self.assertIsNone(valuemap)
np.testing.assert_array_equal(in_values, values)
# more than len(values)**0.7
in_values = list(map(lambda x: str(x) + "a", range(25))) + ["a"] * 75
valuemap, values, coltype = guess_data_type(in_values)
self.assertEqual(StringVariable, coltype)
self.assertIsNone(valuemap)
np.testing.assert_array_equal(in_values, values)
# more than 100 different values - exactly 101
# this is the case when len(values)**0.7 rule would vote for the
# DiscreteVariable
in_values = list(map(lambda x: str(x) + "a", range(100))) + ["a"] * 999
valuemap, values, coltype = guess_data_type(in_values)
self.assertEqual(StringVariable, coltype)
self.assertIsNone(valuemap)
np.testing.assert_array_equal(in_values, values)
def test_guess_data_type_time(self):
in_values = ["2019-10-10", "2019-10-10", "2019-10-10", "2019-10-01"]
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(TimeVariable, coltype)
self.assertIsNone(valuemap)
in_values = ["2019-10-10T12:08:51", "2019-10-10T12:08:51",
"2019-10-10T12:08:51", "2019-10-01T12:08:51"]
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(TimeVariable, coltype)
self.assertIsNone(valuemap)
in_values = ["2019-10-10 12:08:51", "2019-10-10 12:08:51",
"2019-10-10 12:08:51", "2019-10-01 12:08:51"]
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(TimeVariable, coltype)
self.assertIsNone(valuemap)
in_values = ["2019-10-10 12:08", "2019-10-10 12:08",
"2019-10-10 12:08", "2019-10-01 12:08"]
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(TimeVariable, coltype)
self.assertIsNone(valuemap)
def test_guess_data_type_values_order(self):
"""
Test if values are ordered naturally
"""
in_values = [
"something1", "something12", "something2", "something1",
"something20", "something1", "something2", "something12",
"something1", "something12"
]
res = ["something1", "something2", "something12", "something20"]
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(DiscreteVariable, coltype)
self.assertListEqual(res, valuemap)
if __name__ == "__main__":
unittest.main()