Skip to content

Commit 80b5cea

Browse files
committed
[dev]: updating string and datetime conversions for speed
1 parent 5164291 commit 80b5cea

File tree

2 files changed

+22
-17
lines changed

2 files changed

+22
-17
lines changed

src/tabbed/utils/parsing.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import itertools
77
import re
88
import string
9-
from collections import Counter
109
from datetime import date, datetime, time
1110

1211
# define the supported intrinsic types for each list element read by Tabbed
@@ -84,6 +83,12 @@ def datetime_formats() -> list[str]:
8483
return fmts
8584

8685

86+
# GLOBALS OF FORMATS
87+
TIME_FORMATS = time_formats()
88+
DATE_FORMATS = date_formats()
89+
DATETIME_FORMATS = datetime_formats()
90+
91+
8792
def find_format(astring: str, formats: list[str]) -> str | None:
8893
"""Returns the date, time, or datetime format of astring.
8994
@@ -144,7 +149,7 @@ def is_time(astring: str) -> bool:
144149
"""
145150

146151
# all times contain 2 ':' separators
147-
if Counter(astring)[':'] < 2:
152+
if not re.search(r'^\d{1,2}:\d{2}:\d{2}', astring):
148153
return False
149154

150155
# another method to time detect without fmt testing could give speedup
@@ -200,18 +205,16 @@ def as_numeric(astring: str, decimal: str) -> int | float | complex | str:
200205
if decimal != '.':
201206
astring = astring.replace(decimal, '.')
202207

203-
# look for imag part for complex
204-
if re.findall(r'[ij]', astring):
205-
return complex(astring)
206-
207-
# look for a decimal
208-
if re.findall(r'\.', astring):
209-
return float(astring)
210-
211208
try:
212-
return int(astring)
209+
x = float(astring)
213210
except ValueError:
214-
return astring
211+
if re.search(r'i|j', astring):
212+
try:
213+
return complex(astring)
214+
except ValueError:
215+
return astring
216+
217+
return int(x) if x.is_integer() else x
215218

216219

217220
def as_time(astring: str, fmt: str) -> time | str:
@@ -332,23 +335,25 @@ def convert(
332335
return as_numeric(astring, decimal)
333336

334337
# simple string a subset of ascii
335-
if set(astring.lower()).issubset(string.ascii_letters):
338+
# dates and times will have a separator that is non-ascii letters or digits
339+
if set(astring.lower()).issubset(string.ascii_letters + string.digits):
336340
return astring
337341

342+
# dates and times are slower -- room for improvement
338343
# times,dates, datetimes - use asserts for mypy type narrowing
339344
if is_time(astring):
340-
fmt = find_format(astring, time_formats())
345+
fmt = find_format(astring, TIME_FORMATS)
341346
assert isinstance(fmt, str)
342347
return as_time(astring, fmt)
343348

344349
if is_date(astring):
345-
fmt = find_format(astring, date_formats())
350+
fmt = find_format(astring, DATE_FORMATS)
346351
assert isinstance(fmt, str)
347352
return as_date(astring, fmt)
348353

349354
if is_datetime(astring):
350355
# perform datetime last since it has many fmts to test
351-
fmt = find_format(astring, datetime_formats())
356+
fmt = find_format(astring, DATETIME_FORMATS)
352357
assert isinstance(fmt, str)
353358
return as_datetime(astring, fmt)
354359

tests/test_parsing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def valid_time(
177177
diurn = ' ' + rng.choice(diurnal[1]) if dicode else ''
178178

179179
fmt = f'{hfmt}:%M:%S{sep}%f {dicode}'
180-
example = f'{hour}:{mins}:{secs}{sep}{musecs}{diurn}'
180+
example = f'{hour}:{mins:0>{2}}:{secs:0>{2}}{sep}{musecs}{diurn}'
181181

182182
return fmt, example
183183

0 commit comments

Comments
 (0)