Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Orange/data/io_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
isnastr, guess_data_type, sanitize_variable
from Orange.data.util import get_unique_names_duplicates
from Orange.data.variable import VariableMeta
from Orange.misc.collections import natural_sorted
from Orange.util import Registry, flatten, namegen

__all__ = ["FileFormatBase", "Flags", "DataTableMixin", "PICKLE_PROTOCOL"]
Expand Down Expand Up @@ -278,7 +279,7 @@ def _disc_column(data: np.ndarray, col: int) -> \
def _disc_no_vals_column(data: np.ndarray, col: int, **_) -> \
_ColumnProperties:
vals, coltype = _TableBuilder._disc_column(data, col)
return _ColumnProperties(valuemap=sorted(set(vals) - {""}),
return _ColumnProperties(valuemap=natural_sorted(set(vals) - {""}),
values=vals, coltype=coltype,
orig_values=vals)

Expand Down
3 changes: 2 additions & 1 deletion Orange/data/io_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
is_discrete_values, MISSING_VALUES, Variable,
DiscreteVariable, StringVariable, ContinuousVariable, TimeVariable,
)
from Orange.misc.collections import natural_sorted

__all__ = ["Compression", "open_compressed", "detect_encoding", "isnastr",
"guess_data_type", "sanitize_variable"]
Expand Down Expand Up @@ -121,7 +122,7 @@ def guess_data_type(orig_values, namask=None):
if namask is None:
namask = isnastr(orig_values)
if is_discrete:
valuemap = sorted(is_discrete)
valuemap = natural_sorted(is_discrete)
coltype = DiscreteVariable
else:
# try to parse as float
Expand Down
22 changes: 20 additions & 2 deletions Orange/data/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from Orange.data import ContinuousVariable, DiscreteVariable, StringVariable, \
TimeVariable
from Orange.data.io_util import guess_data_type
from Orange.misc.collections import natural_sorted


class TestTableFilters(unittest.TestCase):

def test_guess_data_type_continuous(self):
# should be ContinuousVariable
valuemap, values, coltype = guess_data_type(list(range(1, 100)))
Expand Down Expand Up @@ -42,7 +42,7 @@ def test_guess_data_type_discrete(self):
in_values = list(map(lambda x: str(x) + "a", range(24))) + ["a"] * 76
valuemap, values, coltype = guess_data_type(in_values)
self.assertEqual(DiscreteVariable, coltype)
self.assertEqual(sorted(set(in_values)), valuemap)
self.assertEqual(natural_sorted(set(in_values)), valuemap)
np.testing.assert_array_equal(in_values, values)

def test_guess_data_type_string(self):
Expand Down Expand Up @@ -93,3 +93,21 @@ def test_guess_data_type_time(self):
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(TimeVariable, coltype)
self.assertIsNone(valuemap)

def test_guess_data_type_values_order(self):
"""
Test if values are ordered naturally
"""
in_values = [
"something1", "something12", "something2", "something1",
"something20", "something1", "something2", "something12",
"something1", "something12"
]
res = ["something1", "something2", "something12", "something20"]
valuemap, _, coltype = guess_data_type(in_values)
self.assertEqual(DiscreteVariable, coltype)
self.assertListEqual(res, valuemap)


if __name__ == "__main__":
unittest.main()
33 changes: 33 additions & 0 deletions Orange/misc/collections.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import re
from typing import List


class frozendict(dict):
def clear(self):
raise AttributeError("FrozenDict does not support method 'clear'")
Expand All @@ -20,3 +24,32 @@ def __setitem__(self, _key, _value):
def __delitem__(self, _key):
raise AttributeError("FrozenDict does not allow deleting elements")


def natural_sorted(values: List) -> List:
"""
Sort values with natural sort or human order - [sth1, sth2, sth10] or
[1, 2, 10]

Parameters
----------
values
List with values to sort

Returns
-------
List with sorted values
"""
def atoi(text):
return int(text) if text.isdigit() else text

def natural_keys(element):
"""
alist.sort(key=natural_keys) or sorted(alist, key=natural_keys) sorts
in human order
"""
if isinstance(element, (str, bytes)):
return [atoi(c) for c in re.split(r'(\d+)', element)]
else:
return element

return sorted(values, key=natural_keys)
34 changes: 33 additions & 1 deletion Orange/misc/tests/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest

from Orange.misc.collections import frozendict
from Orange.misc.collections import frozendict, natural_sorted


class TestFrozenDict(unittest.TestCase):
Expand Down Expand Up @@ -29,5 +29,37 @@ def test_functions_as_dict(self):
self.assertEqual(set(d.items()), {("a", 12), ("b", 13)})


class TestUtils(unittest.TestCase):
def test_natural_sorted(self):
data = [
"something1",
"something20",
"something2",
"something12"
]
res = [
"something1",
"something2",
"something12",
"something20"
]
self.assertListEqual(res, natural_sorted(data))

def test_natural_sorted_text(self):
data = ["b", "aa", "c", "dd"]
res = ["aa", "b", "c", "dd"]
self.assertListEqual(res, natural_sorted(data))

def test_natural_sorted_numbers_str(self):
data = ["1", "20", "2", "12"]
res = ["1", "2", "12", "20"]
self.assertListEqual(res, natural_sorted(data))

def test_natural_sorted_numbers(self):
data = [1, 20, 2, 12]
res = [1, 2, 12, 20]
self.assertListEqual(res, natural_sorted(data))


if __name__ == "__main__":
unittest.main()
6 changes: 5 additions & 1 deletion Orange/widgets/utils/domaineditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from Orange.data import DiscreteVariable, ContinuousVariable, StringVariable, \
TimeVariable, Domain
from Orange.misc.collections import natural_sorted
from Orange.data.util import get_unique_names_duplicates
from Orange.statistics.util import unique
from Orange.widgets import gui
Expand Down Expand Up @@ -326,7 +327,10 @@ def numbers_are_round(var, col_data):
elif tpe == type(orig_var):
var = orig_var.copy(name=new_name)
elif tpe == DiscreteVariable:
values = list(str(i) for i in unique(col_data) if not self._is_missing(i))
values = natural_sorted(
list(str(i) for i in unique(col_data)
if not self._is_missing(i))
)
round_numbers = numbers_are_round(orig_var, col_data)
col_data = [np.nan if self._is_missing(x) else values.index(str(x))
for x in self._iter_vals(col_data)]
Expand Down