Skip to content

Commit 55573d7

Browse files
⚡️ Speed up function _isbool by 28% in PR #217 (proper-cleanup)
Here’s an optimized version of your function. **Optimization notes:** - `type(x) is y` is retained for speed. - `set` instantiation is avoided in favor of direct comparison for `"True"` and `"False"` when dealing with strings. - Check for `str` directly rather than combining with `bytes`, as `"True"` and `"False"` are not valid byte representations. - If checking `bytes` is strictly required (not functionally needed per the literal values in the set), let me know.
1 parent 62e10b1 commit 55573d7

File tree

1 file changed

+148
-34
lines changed

1 file changed

+148
-34
lines changed

codeflash/code_utils/tabulate.py

Lines changed: 148 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ def _is_separating_line_value(value):
6161
def _is_separating_line(row):
6262
row_type = type(row)
6363
is_sl = (row_type == list or row_type == str) and (
64-
(len(row) >= 1 and _is_separating_line_value(row[0])) or (len(row) >= 2 and _is_separating_line_value(row[1]))
64+
(len(row) >= 1 and _is_separating_line_value(row[0]))
65+
or (len(row) >= 2 and _is_separating_line_value(row[1]))
6566
)
6667

6768
return is_sl
@@ -151,7 +152,9 @@ def _pipe_line_with_colons(colwidths, colaligns):
151152
_ansi_codes_bytes = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE)
152153
_ansi_color_reset_code = "\033[0m"
153154

154-
_float_with_thousands_separators = re.compile(r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$")
155+
_float_with_thousands_separators = re.compile(
156+
r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$"
157+
)
155158

156159

157160
def _isnumber_with_thousands_separator(string):
@@ -200,12 +203,19 @@ def _isint(string, inttype=int):
200203
(hasattr(string, "is_integer") or hasattr(string, "__array__"))
201204
and str(type(string)).startswith("<class 'numpy.int")
202205
) # numpy.int64 and similar
203-
or (isinstance(string, (bytes, str)) and _isconvertible(inttype, string)) # integer as string
206+
or (
207+
isinstance(string, (bytes, str)) and _isconvertible(inttype, string)
208+
) # integer as string
204209
)
205210

206211

207212
def _isbool(string):
208-
return type(string) is bool or (isinstance(string, (bytes, str)) and string in {"True", "False"})
213+
# Check for bool first (fastest), else for str equality (avoid set instantiation)
214+
if type(string) is bool:
215+
return True
216+
if isinstance(string, str):
217+
return string == "True" or string == "False"
218+
return False
209219

210220

211221
def _type(string, has_invisible=True, numparse=True):
@@ -219,10 +229,18 @@ def _type(string, has_invisible=True, numparse=True):
219229
if _isbool(string):
220230
return bool
221231
if numparse and (
222-
_isint(string) or (isinstance(string, str) and _isnumber_with_thousands_separator(string) and "." not in string)
232+
_isint(string)
233+
or (
234+
isinstance(string, str)
235+
and _isnumber_with_thousands_separator(string)
236+
and "." not in string
237+
)
223238
):
224239
return int
225-
if numparse and (_isnumber(string) or (isinstance(string, str) and _isnumber_with_thousands_separator(string))):
240+
if numparse and (
241+
_isnumber(string)
242+
or (isinstance(string, str) and _isnumber_with_thousands_separator(string))
243+
):
226244
return float
227245
if isinstance(string, bytes):
228246
return bytes
@@ -365,19 +383,29 @@ def _align_column(
365383
is_multiline=False,
366384
preserve_whitespace=False,
367385
):
368-
strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible, preserve_whitespace)
369-
width_fn = _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline)
386+
strings, padfn = _align_column_choose_padfn(
387+
strings, alignment, has_invisible, preserve_whitespace
388+
)
389+
width_fn = _align_column_choose_width_fn(
390+
has_invisible, enable_widechars, is_multiline
391+
)
370392

371393
s_widths = list(map(width_fn, strings))
372394
maxwidth = max(max(_flat_list(s_widths)), minwidth)
373395
# TODO: refactor column alignment in single-line and multiline modes
374396
if is_multiline:
375397
if not enable_widechars and not has_invisible:
376-
padded_strings = ["\n".join([padfn(maxwidth, s) for s in ms.splitlines()]) for ms in strings]
398+
padded_strings = [
399+
"\n".join([padfn(maxwidth, s) for s in ms.splitlines()])
400+
for ms in strings
401+
]
377402
else:
378403
# enable wide-character width corrections
379404
s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings]
380-
visible_widths = [[maxwidth - (w - l) for w, l in zip(mw, ml)] for mw, ml in zip(s_widths, s_lens)]
405+
visible_widths = [
406+
[maxwidth - (w - l) for w, l in zip(mw, ml)]
407+
for mw, ml in zip(s_widths, s_lens)
408+
]
381409
# wcswidth and _visible_width don't count invisible characters;
382410
# padfn doesn't need to apply another correction
383411
padded_strings = [
@@ -419,13 +447,19 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
419447
if valtype is int:
420448
if isinstance(val, str):
421449
val_striped = val.encode("unicode_escape").decode("utf-8")
422-
colored = re.search(r"(\\[xX]+[0-9a-fA-F]+\[\d+[mM]+)([0-9.]+)(\\.*)$", val_striped)
450+
colored = re.search(
451+
r"(\\[xX]+[0-9a-fA-F]+\[\d+[mM]+)([0-9.]+)(\\.*)$", val_striped
452+
)
423453
if colored:
424454
total_groups = len(colored.groups())
425455
if total_groups == 3:
426456
digits = colored.group(2)
427457
if digits.isdigit():
428-
val_new = colored.group(1) + format(int(digits), intfmt) + colored.group(3)
458+
val_new = (
459+
colored.group(1)
460+
+ format(int(digits), intfmt)
461+
+ colored.group(3)
462+
)
429463
val = val_new.encode("utf-8").decode("unicode_escape")
430464
intfmt = ""
431465
return format(val, intfmt)
@@ -447,11 +481,15 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
447481
return f"{val}"
448482

449483

450-
def _align_header(header, alignment, width, visible_width, is_multiline=False, width_fn=None):
484+
def _align_header(
485+
header, alignment, width, visible_width, is_multiline=False, width_fn=None
486+
):
451487
"""Pad string header to width chars given known visible_width of the header."""
452488
if is_multiline:
453489
header_lines = re.split(_multiline_codes, header)
454-
padded_lines = [_align_header(h, alignment, width, width_fn(h)) for h in header_lines]
490+
padded_lines = [
491+
_align_header(h, alignment, width, width_fn(h)) for h in header_lines
492+
]
455493
return "\n".join(padded_lines)
456494
# else: not multiline
457495
ninvisible = len(header) - visible_width
@@ -504,14 +542,19 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
504542
# likely a conventional dict
505543
keys = tabular_data.keys()
506544
try:
507-
rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed
545+
rows = list(
546+
izip_longest(*tabular_data.values())
547+
) # columns have to be transposed
508548
except TypeError: # not iterable
509549
raise TypeError(err_msg)
510550

511551
elif hasattr(tabular_data, "index"):
512552
# values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
513553
keys = list(tabular_data)
514-
if showindex in {"default", "always", True} and tabular_data.index.name is not None:
554+
if (
555+
showindex in {"default", "always", True}
556+
and tabular_data.index.name is not None
557+
):
515558
if isinstance(tabular_data.index.name, list):
516559
keys[:0] = tabular_data.index.name
517560
else:
@@ -535,10 +578,19 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
535578
if headers == "keys" and not rows:
536579
# an empty table (issue #81)
537580
headers = []
538-
elif headers == "keys" and hasattr(tabular_data, "dtype") and tabular_data.dtype.names:
581+
elif (
582+
headers == "keys"
583+
and hasattr(tabular_data, "dtype")
584+
and tabular_data.dtype.names
585+
):
539586
# numpy record array
540587
headers = tabular_data.dtype.names
541-
elif headers == "keys" and len(rows) > 0 and isinstance(rows[0], tuple) and hasattr(rows[0], "_fields"):
588+
elif (
589+
headers == "keys"
590+
and len(rows) > 0
591+
and isinstance(rows[0], tuple)
592+
and hasattr(rows[0], "_fields")
593+
):
542594
# namedtuple
543595
headers = list(map(str, rows[0]._fields))
544596
elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"):
@@ -569,7 +621,9 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
569621
else:
570622
headers = []
571623
elif headers:
572-
raise ValueError("headers for a list of dicts is not a dict or a keyword")
624+
raise ValueError(
625+
"headers for a list of dicts is not a dict or a keyword"
626+
)
573627
rows = [[row.get(k) for k in keys] for row in rows]
574628

575629
elif (
@@ -582,7 +636,11 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
582636
# print tabulate(cursor, headers='keys')
583637
headers = [column[0] for column in tabular_data.description]
584638

585-
elif dataclasses is not None and len(rows) > 0 and dataclasses.is_dataclass(rows[0]):
639+
elif (
640+
dataclasses is not None
641+
and len(rows) > 0
642+
and dataclasses.is_dataclass(rows[0])
643+
):
586644
# Python's dataclass
587645
field_names = [field.name for field in dataclasses.fields(rows[0])]
588646
if headers == "keys":
@@ -652,7 +710,9 @@ def tabulate(
652710
if tabular_data is None:
653711
tabular_data = []
654712

655-
list_of_lists, headers, headers_pad = _normalize_tabular_data(tabular_data, headers, showindex=showindex)
713+
list_of_lists, headers, headers_pad = _normalize_tabular_data(
714+
tabular_data, headers, showindex=showindex
715+
)
656716
list_of_lists, separating_lines = _remove_separating_lines(list_of_lists)
657717

658718
# PrettyTable formatting does not use any extra padding.
@@ -694,7 +754,11 @@ def tabulate(
694754
has_invisible = _ansi_codes.search(plain_text) is not None
695755

696756
enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
697-
if not isinstance(tablefmt, TableFormat) and tablefmt in multiline_formats and _is_multiline(plain_text):
757+
if (
758+
not isinstance(tablefmt, TableFormat)
759+
and tablefmt in multiline_formats
760+
and _is_multiline(plain_text)
761+
):
698762
tablefmt = multiline_formats.get(tablefmt, tablefmt)
699763
is_multiline = True
700764
else:
@@ -706,13 +770,17 @@ def tabulate(
706770
numparses = _expand_numparse(disable_numparse, len(cols))
707771
coltypes = [_column_type(col, numparse=np) for col, np in zip(cols, numparses)]
708772
if isinstance(floatfmt, str): # old version
709-
float_formats = len(cols) * [floatfmt] # just duplicate the string to use in each column
773+
float_formats = len(cols) * [
774+
floatfmt
775+
] # just duplicate the string to use in each column
710776
else: # if floatfmt is list, tuple etc we have one per column
711777
float_formats = list(floatfmt)
712778
if len(float_formats) < len(cols):
713779
float_formats.extend((len(cols) - len(float_formats)) * [_DEFAULT_FLOATFMT])
714780
if isinstance(intfmt, str): # old version
715-
int_formats = len(cols) * [intfmt] # just duplicate the string to use in each column
781+
int_formats = len(cols) * [
782+
intfmt
783+
] # just duplicate the string to use in each column
716784
else: # if intfmt is list, tuple etc we have one per column
717785
int_formats = list(intfmt)
718786
if len(int_formats) < len(cols):
@@ -725,7 +793,9 @@ def tabulate(
725793
missing_vals.extend((len(cols) - len(missing_vals)) * [_DEFAULT_MISSINGVAL])
726794
cols = [
727795
[_format(v, ct, fl_fmt, int_fmt, miss_v, has_invisible) for v in c]
728-
for c, ct, fl_fmt, int_fmt, miss_v in zip(cols, coltypes, float_formats, int_formats, missing_vals)
796+
for c, ct, fl_fmt, int_fmt, miss_v in zip(
797+
cols, coltypes, float_formats, int_formats, missing_vals
798+
)
729799
]
730800

731801
# align columns
@@ -748,14 +818,24 @@ def tabulate(
748818
break
749819
if align != "global":
750820
aligns[idx] = align
751-
minwidths = [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols)
821+
minwidths = (
822+
[width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols)
823+
)
752824
aligns_copy = aligns.copy()
753825
# Reset alignments in copy of alignments list to "left" for 'colon_grid' format,
754826
# which enforces left alignment in the text output of the data.
755827
if tablefmt == "colon_grid":
756828
aligns_copy = ["left"] * len(cols)
757829
cols = [
758-
_align_column(c, a, minw, has_invisible, enable_widechars, is_multiline, preserve_whitespace)
830+
_align_column(
831+
c,
832+
a,
833+
minw,
834+
has_invisible,
835+
enable_widechars,
836+
is_multiline,
837+
preserve_whitespace,
838+
)
759839
for c, a, minw in zip(cols, aligns_copy, minwidths)
760840
]
761841

@@ -786,7 +866,10 @@ def tabulate(
786866
aligns_headers[hidx] = aligns[hidx]
787867
elif align != "global":
788868
aligns_headers[hidx] = align
789-
minwidths = [max(minw, max(width_fn(cl) for cl in c)) for minw, c in zip(minwidths, t_cols)]
869+
minwidths = [
870+
max(minw, max(width_fn(cl) for cl in c))
871+
for minw, c in zip(minwidths, t_cols)
872+
]
790873
headers = [
791874
_align_header(h, a, minw, width_fn(h), is_multiline, width_fn)
792875
for h, a, minw in zip(headers, aligns_headers, minwidths)
@@ -801,7 +884,16 @@ def tabulate(
801884

802885
ra_default = rowalign if isinstance(rowalign, str) else None
803886
rowaligns = _expand_iterable(rowalign, len(rows), ra_default)
804-
return _format_table(tablefmt, headers, aligns_headers, rows, minwidths, aligns, is_multiline, rowaligns=rowaligns)
887+
return _format_table(
888+
tablefmt,
889+
headers,
890+
aligns_headers,
891+
rows,
892+
minwidths,
893+
aligns,
894+
is_multiline,
895+
rowaligns=rowaligns,
896+
)
805897

806898

807899
def _expand_numparse(disable_numparse, column_count):
@@ -864,7 +956,9 @@ def _append_line(lines, colwidths, colaligns, linefmt):
864956
return lines
865957

866958

867-
def _format_table(fmt, headers, headersaligns, rows, colwidths, colaligns, is_multiline, rowaligns):
959+
def _format_table(
960+
fmt, headers, headersaligns, rows, colwidths, colaligns, is_multiline, rowaligns
961+
):
868962
lines = []
869963
hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
870964
pad = fmt.padding
@@ -888,21 +982,41 @@ def _format_table(fmt, headers, headersaligns, rows, colwidths, colaligns, is_mu
888982
# initial rows with a line below
889983
for row, ralign in zip(rows[:-1], rowaligns):
890984
if row != SEPARATING_LINE:
891-
append_row(lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow, rowalign=ralign)
985+
append_row(
986+
lines,
987+
pad_row(row, pad),
988+
padded_widths,
989+
colaligns,
990+
fmt.datarow,
991+
rowalign=ralign,
992+
)
892993
_append_line(lines, padded_widths, colaligns, fmt.linebetweenrows)
893994
# the last row without a line below
894-
append_row(lines, pad_row(rows[-1], pad), padded_widths, colaligns, fmt.datarow, rowalign=rowaligns[-1])
995+
append_row(
996+
lines,
997+
pad_row(rows[-1], pad),
998+
padded_widths,
999+
colaligns,
1000+
fmt.datarow,
1001+
rowalign=rowaligns[-1],
1002+
)
8951003
else:
8961004
separating_line = (
897-
fmt.linebetweenrows or fmt.linebelowheader or fmt.linebelow or fmt.lineabove or Line("", "", "", "")
1005+
fmt.linebetweenrows
1006+
or fmt.linebelowheader
1007+
or fmt.linebelow
1008+
or fmt.lineabove
1009+
or Line("", "", "", "")
8981010
)
8991011
for row in rows:
9001012
# test to see if either the 1st column or the 2nd column (account for showindex) has
9011013
# the SEPARATING_LINE flag
9021014
if _is_separating_line(row):
9031015
_append_line(lines, padded_widths, colaligns, separating_line)
9041016
else:
905-
append_row(lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow)
1017+
append_row(
1018+
lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow
1019+
)
9061020

9071021
if fmt.linebelow and "linebelow" not in hidden:
9081022
_append_line(lines, padded_widths, colaligns, fmt.linebelow)

0 commit comments

Comments
 (0)