Skip to content

Commit 8c9c9e7

Browse files
committed
Support better type deduction
o Empty/None values are ignored for deducing the type of a column o Comma-separated numbers are allowed in for int and float types
1 parent cecb08e commit 8c9c9e7

File tree

2 files changed

+61
-7
lines changed

2 files changed

+61
-7
lines changed

tabulate/__init__.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -933,8 +933,13 @@ def _isbool(string):
933933
def _type(string, has_invisible=True, numparse=True):
934934
"""The least generic type (type(None), int, float, str, unicode).
935935
936+
Treats empty string as missing for the purposes of type deduction, so as to not influence
937+
the type of an otherwise complete column; does *not* result in missingval replacement!
938+
936939
>>> _type(None) is type(None)
937940
True
941+
>>> _type("") is type(None)
942+
True
938943
>>> _type("foo") is type("")
939944
True
940945
>>> _type("1") is type(1)
@@ -949,15 +954,26 @@ def _type(string, has_invisible=True, numparse=True):
949954
if has_invisible and isinstance(string, (str, bytes)):
950955
string = _strip_ansi(string)
951956

952-
if string is None:
957+
if string is None or (isinstance(string, (bytes, str)) and not string):
953958
return type(None)
954959
elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
955960
return str
956961
elif _isbool(string):
957962
return bool
958-
elif _isint(string) and numparse:
963+
elif numparse and (
964+
_isint(string) or (
965+
isinstance(string, str)
966+
and _isnumber_with_thousands_separator(string)
967+
and '.' not in string
968+
)
969+
):
959970
return int
960-
elif _isnumber(string) and numparse:
971+
elif numparse and (
972+
_isnumber(string) or (
973+
isinstance(string, str)
974+
and _isnumber_with_thousands_separator(string)
975+
)
976+
):
961977
return float
962978
elif isinstance(string, bytes):
963979
return bytes
@@ -1251,7 +1267,7 @@ def _column_type(strings, has_invisible=True, numparse=True):
12511267

12521268

12531269
def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
1254-
"""Format a value according to its type.
1270+
"""Format a value according to its deduced type. Empty values are deemed valid for any type.
12551271
12561272
Unicode is supported:
12571273
@@ -1264,6 +1280,8 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
12641280
""" # noqa
12651281
if val is None:
12661282
return missingval
1283+
if isinstance(val, (bytes, str)) and not val:
1284+
return ""
12671285

12681286
if valtype is str:
12691287
return f"{val}"
@@ -1298,6 +1316,8 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
12981316
formatted_val = format(float(raw_val), floatfmt)
12991317
return val.replace(raw_val, formatted_val)
13001318
else:
1319+
if isinstance(val,str) and ',' in val:
1320+
val = val.replace(',', '') # handle thousands-separators
13011321
return format(float(val), floatfmt)
13021322
else:
13031323
return f"{val}"
@@ -1592,9 +1612,10 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True):
15921612

15931613
if width is not None:
15941614
wrapper = _CustomTextWrap(width=width)
1595-
# Cast based on our internal type handling
1596-
# Any future custom formatting of types (such as datetimes)
1597-
# may need to be more explicit than just `str` of the object
1615+
# Cast based on our internal type handling. Any future custom
1616+
# formatting of types (such as datetimes) may need to be more
1617+
# explicit than just `str` of the object. Also doesn't work for
1618+
# custom floatfmt/intfmt, nor with any missing/blank cells.
15981619
casted_cell = (
15991620
str(cell) if _isnumber(cell) else _type(cell, numparse)(cell)
16001621
)

test/test_output.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2824,6 +2824,13 @@ def test_floatfmt():
28242824
assert_equal(expected, result)
28252825

28262826

2827+
def test_floatfmt_thousands():
2828+
"Output: floating point format"
2829+
result = tabulate([["1.23456789"], [1.0], ["1,234.56"]], floatfmt=".3f", tablefmt="plain")
2830+
expected = " 1.235\n 1.000\n1234.560"
2831+
assert_equal(expected, result)
2832+
2833+
28272834
def test_floatfmt_multi():
28282835
"Output: floating point format different for each column"
28292836
result = tabulate(
@@ -2964,6 +2971,32 @@ def test_missingval_multi():
29642971
assert_equal(expected, result)
29652972

29662973

2974+
def test_column_emptymissing_deduction():
2975+
"Missing or empty/blank values shouldn't change type deduction of rest of column"
2976+
from fractions import Fraction
2977+
2978+
test_table = [
2979+
[None, "1.23423515351", Fraction(1, 3)],
2980+
[Fraction(56789, 1000000), 12345.1, b"abc"],
2981+
["", b"", None],
2982+
[Fraction(10000, 3), None, ""],
2983+
]
2984+
result = tabulate(
2985+
test_table,
2986+
floatfmt=",.5g",
2987+
missingval="?",
2988+
)
2989+
print(f"\n{result}")
2990+
expected = """\
2991+
------------ ----------- ---
2992+
? 1.2342 1/3
2993+
0.056789 12,345 abc
2994+
?
2995+
3,333.3 ?
2996+
------------ ----------- ---"""
2997+
assert_equal(expected, result)
2998+
2999+
29673000
def test_column_alignment():
29683001
"Output: custom alignment for text and numbers"
29693002
expected = "\n".join(["----- ---", "Alice 1", " Bob 333", "----- ---"])

0 commit comments

Comments
 (0)