Skip to content

Commit bd2f7ec

Browse files
authored
Merge pull request #4575 from janezd/str-val-more-decimals
[ENH] Fix printing values with too few decimals
2 parents f7788e5 + c36d9bf commit bd2f7ec

File tree

5 files changed

+118
-8
lines changed

5 files changed

+118
-8
lines changed

Orange/data/tests/test_variable.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -498,23 +498,42 @@ def test_make(self):
498498

499499
def test_decimals(self):
500500
a = ContinuousVariable("a", 4)
501-
self.assertEqual(a.str_val(4.654321), "4.6543")
502-
self.assertEqual(a.str_val(4.654321654321), "4.6543")
501+
self.assertEqual(a.str_val(4.6543), "4.6543")
502+
self.assertEqual(a.str_val(4.25), "4.2500")
503503
self.assertEqual(a.str_val(Unknown), "?")
504504
a = ContinuousVariable("a", 5)
505505
self.assertEqual(a.str_val(0.000000000001), "0.00000")
506506
a = ContinuousVariable("a", 10)
507507
self.assertEqual(a.str_val(0.000000000001), "1e-12")
508508

509+
def test_more_decimals(self):
510+
a = ContinuousVariable("a", 0)
511+
self.assertEqual(a.str_val(4), "4")
512+
self.assertEqual(a.str_val(4.1234), "4.12")
513+
514+
a = ContinuousVariable("a", 2)
515+
self.assertEqual(a.str_val(4), "4.00")
516+
self.assertEqual(a.str_val(4.25), "4.25")
517+
self.assertEqual(a.str_val(4.1234123), "4.1234")
518+
519+
for cca4 in (4 + 1e-9, 4 - 1e-9):
520+
assert cca4 != 4
521+
self.assertEqual(a.str_val(cca4), "4.00")
522+
509523
def test_adjust_decimals(self):
524+
# Default is 3 decimals, but format is %g
510525
a = ContinuousVariable("a")
526+
self.assertEqual(a.str_val(5), "5")
511527
self.assertEqual(a.str_val(4.65432), "4.65432")
528+
529+
# Change to no decimals
512530
a.val_from_str_add("5")
513-
self.assertEqual(a.str_val(4.65432), "5")
531+
self.assertEqual(a.str_val(5), "5")
532+
533+
# Change to two decimals
514534
a.val_from_str_add(" 5.12 ")
515-
self.assertEqual(a.str_val(4.65432), "4.65")
516-
a.val_from_str_add("5.1234")
517-
self.assertEqual(a.str_val(4.65432), "4.6543")
535+
self.assertEqual(a.str_val(4.65), "4.65")
536+
self.assertEqual(a.str_val(5), "5.00")
518537

519538
def varcls_modified(self, name):
520539
var = super().varcls_modified(name)

Orange/data/variable.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ def __init__(self, name="", number_of_decimals=None, compute_value=None, *, spar
529529
three, but adjusted at the first call of :obj:`to_val`.
530530
"""
531531
super().__init__(name, compute_value, sparse=sparse)
532+
self._max_round_diff = 0
532533
self.number_of_decimals = number_of_decimals
533534

534535
@property
@@ -553,6 +554,7 @@ def number_of_decimals(self, x):
553554
return
554555

555556
self._number_of_decimals = x
557+
self._max_round_diff = 10 ** (-x - 6)
556558
self.adjust_decimals = 0
557559
if self._number_of_decimals <= MAX_NUM_OF_DECIMALS:
558560
self._format_str = "%.{}f".format(self.number_of_decimals)
@@ -580,6 +582,10 @@ def repr_val(self, val):
580582
"""
581583
if isnan(val):
582584
return "?"
585+
if self.format_str != "%g" \
586+
and abs(round(val, self._number_of_decimals) - val) \
587+
> self._max_round_diff:
588+
return f"{val:.{self._number_of_decimals + 2}f}"
583589
return self._format_str % val
584590

585591
str_val = repr_val
@@ -593,6 +599,7 @@ def copy(self, compute_value=None, *, name=None, **kwargs):
593599
var.number_of_decimals = number_of_decimals
594600
else:
595601
var._number_of_decimals = self._number_of_decimals
602+
var._max_round_diff = self._max_round_diff
596603
var.adjust_decimals = self.adjust_decimals
597604
var.format_str = self._format_str
598605
return var

Orange/tests/test_discretize.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,17 @@ def test_create_discretized_var_formatting(self):
140140

141141
dvar = discretize.Discretizer.create_discretized_var(
142142
self.var, [10.1234])
143-
self.assertEqual(dvar.values, ("< 10.1", "≥ 10.1"))
143+
self.assertEqual(dvar.values, ("< 10.123", "≥ 10.123"))
144144

145145
self.var.number_of_decimals = 3
146146

147+
dvar = discretize.Discretizer.create_discretized_var(
148+
self.var, [5, 10.25])
149+
self.assertEqual(dvar.values, ("< 5", "5 - 10.25", "≥ 10.25"))
150+
147151
dvar = discretize.Discretizer.create_discretized_var(
148152
self.var, [5, 10.1234])
149-
self.assertEqual(dvar.values, ("< 5", "5 - 10.123", "≥ 10.123"))
153+
self.assertEqual(dvar.values, ("< 5", "5 - 10.1234", "≥ 10.1234"))
150154

151155
def test_discretizer_computation(self):
152156
dvar = discretize.Discretizer.create_discretized_var(

benchmark/bench_save.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from functools import partial
2+
import os
3+
4+
import numpy as np
5+
import scipy.sparse
6+
7+
from Orange.data import Table, ContinuousVariable, Domain
8+
from .base import Benchmark, benchmark
9+
10+
11+
def save(table, fn):
12+
try:
13+
table.save(fn)
14+
finally:
15+
os.remove(fn)
16+
17+
18+
class BenchSave(Benchmark):
19+
20+
def setup_dense(self, rows, cols, varkwargs=None):
21+
if varkwargs is None:
22+
varkwargs = {}
23+
self.table = Table.from_numpy( # pylint: disable=W0201
24+
Domain([ContinuousVariable(str(i), **varkwargs) for i in range(cols)]),
25+
np.random.RandomState(0).rand(rows, cols))
26+
27+
def setup_sparse(self, rows, cols, varkwargs=None):
28+
if varkwargs is None:
29+
varkwargs = {}
30+
sparse = scipy.sparse.rand(rows, cols, density=0.01, format='csr', random_state=0)
31+
self.table = Table.from_numpy( # pylint: disable=W0201
32+
Domain([ContinuousVariable(str(i), sparse=True, **varkwargs) for i in range(cols)]),
33+
sparse)
34+
35+
@benchmark(setup=partial(setup_dense, rows=100, cols=10))
36+
def bench_print_dense(self):
37+
str(self.table)
38+
39+
@benchmark(setup=partial(setup_dense, rows=100, cols=10,
40+
varkwargs={"number_of_decimals": 2}))
41+
def bench_print_dense_decimals(self):
42+
str(self.table)
43+
44+
@benchmark(setup=partial(setup_sparse, rows=100, cols=10), number=5)
45+
def bench_print_sparse(self):
46+
str(self.table)
47+
48+
@benchmark(setup=partial(setup_sparse, rows=100, cols=10,
49+
varkwargs={"number_of_decimals": 2}),
50+
number=5)
51+
def bench_print_sparse_decimals(self):
52+
str(self.table)
53+
54+
@benchmark(setup=partial(setup_dense, rows=100, cols=100))
55+
def bench_save_tab(self):
56+
save(self.table, "temp_save.tab")
57+
58+
@benchmark(setup=partial(setup_dense, rows=100, cols=100,
59+
varkwargs={"number_of_decimals": 2}))
60+
def bench_save_tab_decimals(self):
61+
save(self.table, "temp_save.tab")

benchmark/bench_variable.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from Orange.data import ContinuousVariable
2+
from .base import Benchmark, benchmark
3+
4+
5+
class BenchContinuous(Benchmark):
6+
7+
# pylint: disable=no-self-use
8+
@benchmark()
9+
def bench_str_val_decimals(self):
10+
a = ContinuousVariable("a", 4)
11+
for _ in range(1000):
12+
a.str_val(1.23456)
13+
14+
# pylint: disable=no-self-use
15+
@benchmark()
16+
def bench_str_val_g(self):
17+
a = ContinuousVariable("a")
18+
for _ in range(1000):
19+
a.str_val(1.23456)

0 commit comments

Comments
 (0)