Skip to content

Commit 1ad65b9

Browse files
authored
Merge pull request #4818 from PrimozGodec/remove-ordered
Discrete variable: remove ordered attribute
2 parents 6e9f13c + 98e95a2 commit 1ad65b9

17 files changed

+118
-166
lines changed

Orange/data/io_base.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,7 @@ def _disc_with_vals_column(data: np.ndarray, col: int,
288288
values="", **_) -> _ColumnProperties:
289289
vals, coltype = _TableBuilder._disc_column(data, col)
290290
return _ColumnProperties(valuemap=Flags.split(values), values=vals,
291-
coltype=coltype, orig_values=vals,
292-
coltype_kwargs={"ordered": True})
291+
coltype=coltype, orig_values=vals)
293292

294293
@staticmethod
295294
def _unknown_column(data: np.ndarray, col: int, **_) -> _ColumnProperties:
@@ -607,8 +606,12 @@ def _vartype(var):
607606
if var.is_continuous or var.is_string:
608607
return var.TYPE_HEADERS[0]
609608
elif var.is_discrete:
610-
return Flags.join(var.values) if var.ordered else \
611-
var.TYPE_HEADERS[0]
609+
# if number of values is 1 order is not important if more
610+
# values write order in file
611+
return (
612+
Flags.join(var.values) if len(var.values) >= 2
613+
else var.TYPE_HEADERS[0]
614+
)
612615
raise NotImplementedError
613616

614617
return ['continuous'] * data.has_weights() + \

Orange/data/pandas_compat.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,9 @@ def _column_to_series(col, vals):
9797
result = ()
9898
if col.is_discrete:
9999
codes = pd.Series(vals).fillna(-1).astype(int)
100-
result = (col.name, pd.Categorical.from_codes(codes=codes, categories=col.values,
101-
ordered=col.ordered))
100+
result = (col.name, pd.Categorical.from_codes(
101+
codes=codes, categories=col.values, ordered=True
102+
))
102103
elif col.is_time:
103104
result = (col.name, pd.to_datetime(vals, unit='s').to_series().reset_index()[0])
104105
elif col.is_continuous:

Orange/data/tests/test_io_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def test_column_parts_discrete_values(self):
128128
np.testing.assert_array_equal(column.orig_values,
129129
["red", "red", "green"])
130130
self.assertEqual(column.coltype, DiscreteVariable)
131-
self.assertDictEqual(column.coltype_kwargs, {'ordered': True})
131+
self.assertDictEqual(column.coltype_kwargs, {})
132132

133133
def test_unknown_type_column(self):
134134
data = np.array(self.header0)

Orange/data/tests/test_variable.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -279,15 +279,12 @@ def test_repr(self):
279279
self.assertEqual(
280280
repr(var),
281281
"DiscreteVariable(name='a', values=('F', 'M'))")
282-
var.ordered = True
283-
self.assertEqual(
284-
repr(var),
285-
"DiscreteVariable(name='a', values=('F', 'M'), ordered=True)")
286282

287283
var = DiscreteVariable.make("a", values="1234567")
288284
self.assertEqual(
289285
repr(var),
290-
"DiscreteVariable(name='a', values=('1', '2', '3', '4', '5', '6', '7'))")
286+
"DiscreteVariable(name='a', values=('1', '2', '3', '4', '5', '6', '7'))"
287+
)
291288

292289
def test_no_nonstringvalues(self):
293290
self.assertRaises(TypeError, DiscreteVariable, "foo", values=("a", 42))
@@ -488,7 +485,6 @@ def varcls_modified(self, name):
488485
var = super().varcls_modified(name)
489486
var.add_value("A")
490487
var.add_value("B")
491-
var.ordered = True
492488
return var
493489

494490
def test_copy_checks_len_values(self):
@@ -508,6 +504,16 @@ def test_copy_checks_len_values(self):
508504
var2 = var.copy(values=("W", "M"))
509505
self.assertEqual(var2.values, ("W", "M"))
510506

507+
def test_remove_ordered(self):
508+
"""
509+
ordered is deprecated when this test starts to fail remove ordered
510+
parameter. Remove also this test.
511+
Ordered parameter should still be allowed in __init__ for backward
512+
compatibilities in data-sets pickled with older versions, I suggest
513+
adding **kwargs which is ignored
514+
"""
515+
self.assertLess(Orange.__version__, "3.29.0")
516+
511517

512518
@variabletest(ContinuousVariable)
513519
class TestContinuousVariable(VariableTest):
@@ -697,10 +703,7 @@ def varcls_modified(self, name):
697703
"PickleDiscreteVariable",
698704
("with_name", lambda: DiscreteVariable(name="Feature 0")),
699705
("with_str_value", lambda: DiscreteVariable(name="Feature 0",
700-
values=("F", "M"))),
701-
("ordered", lambda: DiscreteVariable(name="Feature 0",
702-
values=("F", "M"),
703-
ordered=True)),
706+
values=("F", "M")))
704707
)
705708

706709

@@ -712,7 +715,7 @@ def varcls_modified(self, name):
712715

713716
class VariableTestMakeProxy(unittest.TestCase):
714717
def test_make_proxy_disc(self):
715-
abc = DiscreteVariable("abc", values="abc", ordered=True)
718+
abc = DiscreteVariable("abc", values="abc")
716719
abc1 = abc.make_proxy()
717720
abc2 = abc1.make_proxy()
718721
self.assertEqual(abc, abc1)
@@ -721,7 +724,7 @@ def test_make_proxy_disc(self):
721724
self.assertEqual(hash(abc), hash(abc1))
722725
self.assertEqual(hash(abc1), hash(abc2))
723726

724-
abcx = DiscreteVariable("abc", values="abc", ordered=True)
727+
abcx = DiscreteVariable("abc", values="abc")
725728
self.assertEqual(abc, abcx)
726729
self.assertIsNot(abc, abcx)
727730

Orange/data/variable.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -629,21 +629,16 @@ class DiscreteVariable(Variable):
629629
.. attribute:: values
630630
631631
A list of variable's values.
632-
633-
.. attribute:: ordered
634-
635-
Some algorithms (and, in particular, visualizations) may
636-
sometime reorder the values of the variable, e.g. alphabetically.
637-
This flag hints that the given order of values is "natural"
638-
(e.g. "small", "middle", "large") and should not be changed.
639632
"""
640633

641634
TYPE_HEADERS = ('discrete', 'd', 'categorical')
642635

643636
presorted_values = []
644637

645-
def __init__(self, name="", values=(), ordered=False, compute_value=None,
646-
*, sparse=False):
638+
def __init__(
639+
self, name="", values=(), ordered=None, compute_value=None,
640+
*, sparse=False
641+
):
647642
""" Construct a discrete variable descriptor with the given values. """
648643
values = TupleList(values) # some people (including me) pass a generator
649644
if not all(isinstance(value, str) for value in values):
@@ -652,7 +647,23 @@ def __init__(self, name="", values=(), ordered=False, compute_value=None,
652647
super().__init__(name, compute_value, sparse=sparse)
653648
self._values = values
654649
self._value_index = {value: i for i, value in enumerate(values)}
655-
self.ordered = ordered
650+
651+
if ordered is not None:
652+
warnings.warn(
653+
"ordered is deprecated and does not have effect. It will be "
654+
"removed in future versions.",
655+
OrangeDeprecationWarning
656+
)
657+
658+
@property
659+
def ordered(self):
660+
warnings.warn(
661+
"ordered is deprecated. It will be removed in future versions.",
662+
# FutureWarning warning is used instead of OrangeDeprecation
663+
# warning otherwise tests fail (__repr__ still asks for ordered)
664+
FutureWarning
665+
)
666+
return None
656667

657668
@property
658669
def values(self):
@@ -819,9 +830,11 @@ def __reduce__(self):
819830
raise PickleError("Variables without names cannot be pickled")
820831
__dict__ = dict(self.__dict__)
821832
__dict__.pop("_values")
822-
return make_variable, (self.__class__, self._compute_value, self.name,
823-
self.values, self.ordered), \
833+
return (
834+
make_variable,
835+
(self.__class__, self._compute_value, self.name, self.values),
824836
__dict__
837+
)
825838

826839
def copy(self, compute_value=Variable._CopyComputeValue,
827840
*, name=None, values=None, **_):
@@ -830,7 +843,7 @@ def copy(self, compute_value=Variable._CopyComputeValue,
830843
raise ValueError(
831844
"number of values must match the number of original values")
832845
return super().copy(compute_value=compute_value, name=name,
833-
values=values or self.values, ordered=self.ordered)
846+
values=values or self.values)
834847

835848

836849
class StringVariable(Variable):

Orange/preprocess/remove.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,6 @@ def merge_transforms(exp):
169169
new_var = DiscreteVariable(
170170
exp.var.name,
171171
values=exp.var.values,
172-
ordered=exp.var.ordered,
173172
compute_value=merge_lookup(A, B),
174173
sparse=exp.var.sparse,
175174
)

Orange/statistics/distribution.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,12 +217,10 @@ def sample(self, size=None, replace=True):
217217
return data.Value(self.variable, value_indices)
218218

219219
def min(self):
220-
if self.variable.ordered:
221-
return self.variable.values[0]
220+
return None
222221

223222
def max(self):
224-
if self.variable.ordered:
225-
return self.variable.values[-1]
223+
return None
226224

227225
def sum(self, *args, **kwargs):
228226
res = super().sum(*args, **kwargs)

Orange/tests/test_distribution.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def setUp(self):
3232
data.Domain(
3333
attributes=[
3434
data.DiscreteVariable('rgb', values=('r', 'g', 'b', 'a')),
35-
data.DiscreteVariable('num', values=('1', '2', '3'), ordered=True),
35+
data.DiscreteVariable('num', values=('1', '2', '3')),
3636
]
3737
),
3838
X=np.array([
@@ -201,8 +201,8 @@ def test_min_max(self):
201201
self.assertEqual(self.rgb.min(), None)
202202
self.assertEqual(self.rgb.max(), None)
203203
# Min and max should work for ordinal variables
204-
self.assertEqual(self.num.min(), '1')
205-
self.assertEqual(self.num.max(), '3')
204+
self.assertEqual(self.num.min(), None)
205+
self.assertEqual(self.num.max(), None)
206206

207207
def test_array_with_unknowns(self):
208208
d = data.Table("zoo")

Orange/tests/test_io.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import shutil
88
import tempfile
99
import unittest
10+
import warnings
1011
from unittest.mock import Mock, patch
1112

1213
from Orange import data
@@ -16,6 +17,7 @@
1617
from Orange.data.table import get_sample_datasets_dir
1718
from Orange.data import Table, Variable
1819
from Orange.tests import test_dirname
20+
from Orange.util import OrangeDeprecationWarning
1921

2022

2123
class WildcardReader(FileFormat):
@@ -161,27 +163,34 @@ def test_load_pickle(self):
161163
This function tests whether pickled files in older Orange loads
162164
correctly with newer version of Orange.
163165
"""
164-
# load pickles created with Orange 3.20
165-
# in next version there is a change in variables.py - line 738
166-
# which broke back compatibility - tests were introduced after the fix
167-
data1 = Table("datasets/sailing-orange-3-20.pkl")
168-
data2 = Table("datasets/sailing-orange-3-20.pkl.gz")
169-
170-
# load pickles created with Orange 3.21
171-
data3 = Table("datasets/sailing-orange-3-21.pkl")
172-
data4 = Table("datasets/sailing-orange-3-21.pkl.gz")
173-
174-
examples_count = 20
175-
self.assertEqual(examples_count, len(data1))
176-
self.assertEqual(examples_count, len(data2))
177-
self.assertEqual(examples_count, len(data3))
178-
self.assertEqual(examples_count, len(data4))
179-
180-
attributes_count = 3
181-
self.assertEqual(attributes_count, len(data1.domain.attributes))
182-
self.assertEqual(attributes_count, len(data2.domain.attributes))
183-
self.assertEqual(attributes_count, len(data3.domain.attributes))
184-
self.assertEqual(attributes_count, len(data4.domain.attributes))
166+
with warnings.catch_warnings():
167+
# in unittests on travis/github actions OrangeDeprecationWarning
168+
# is raised as an error. With this statement it si disabled only
169+
# for this test - when unpickling pickle created with version older
170+
# than 3.27 ordered parameter in DiscreteVariable which is
171+
# deprecated still appears - which will raise deprecation warning
172+
warnings.simplefilter('default', OrangeDeprecationWarning)
173+
# load pickles created with Orange 3.20
174+
# in next version there is a change in variables.py - line 738
175+
# which broke back compatibility - tests introduced after the fix
176+
data1 = Table("datasets/sailing-orange-3-20.pkl")
177+
data2 = Table("datasets/sailing-orange-3-20.pkl.gz")
178+
179+
# load pickles created with Orange 3.21
180+
data3 = Table("datasets/sailing-orange-3-21.pkl")
181+
data4 = Table("datasets/sailing-orange-3-21.pkl.gz")
182+
183+
examples_count = 20
184+
self.assertEqual(examples_count, len(data1))
185+
self.assertEqual(examples_count, len(data2))
186+
self.assertEqual(examples_count, len(data3))
187+
self.assertEqual(examples_count, len(data4))
188+
189+
attributes_count = 3
190+
self.assertEqual(attributes_count, len(data1.domain.attributes))
191+
self.assertEqual(attributes_count, len(data2.domain.attributes))
192+
self.assertEqual(attributes_count, len(data3.domain.attributes))
193+
self.assertEqual(attributes_count, len(data4.domain.attributes))
185194

186195
def test_pickle_version(self):
187196
"""
@@ -196,3 +205,7 @@ def test_pickle_version(self):
196205
self.assertGreaterEqual(PICKLE_PROTOCOL, pickle.DEFAULT_PROTOCOL)
197206
# we should not use a version that is not supported
198207
self.assertLessEqual(PICKLE_PROTOCOL, pickle.HIGHEST_PROTOCOL)
208+
209+
210+
if __name__ == "__main__":
211+
unittest.main()

Orange/widgets/data/owcsvimport.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,12 +1543,11 @@ def pandas_to_table(df):
15431543
coldata = series.values # type: pd.Categorical
15441544
categories = [str(c) for c in coldata.categories]
15451545
var = Orange.data.DiscreteVariable.make(
1546-
str(header), values=categories, ordered=coldata.ordered
1546+
str(header), values=categories
15471547
)
15481548
# Remap the coldata into the var.values order/set
15491549
coldata = pd.Categorical(
1550-
coldata.astype("str"), categories=var.values,
1551-
ordered=coldata.ordered,
1550+
coldata.astype(coldata), categories=var.values
15521551
)
15531552
codes = coldata.codes
15541553
assert np.issubdtype(codes.dtype, np.integer)

0 commit comments

Comments
 (0)