Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions Lib/_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,6 @@ def __init__(self, locale_time=None):
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
# W is set below by using 'U'
'y': r"(?P<y>\d\d)",
#XXX: Does 'Y' need to worry about having less or more than
# 4 digits?
'Y': r"(?P<Y>\d\d\d\d)",
'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))",
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
Expand All @@ -213,8 +211,10 @@ def __init__(self, locale_time=None):
'Z'),
'%': '%'})
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__(
'c', self.__pattern_with_lax_year(self.locale_time.LC_date_time))
base.__setitem__(
'x', self.__pattern_with_lax_year(self.locale_time.LC_date))
base.__setitem__('X', self.pattern(self.locale_time.LC_time))

def __seqToRE(self, to_convert, directive):
Expand All @@ -236,6 +236,26 @@ def __seqToRE(self, to_convert, directive):
regex = '(?P<%s>%s' % (directive, regex)
return '%s)' % regex

def __pattern_with_lax_year(self, format):
"""Like pattern(), but making %Y and %y accept also fewer digits.

Necessary to ensure that strptime() is able to parse strftime()'s
output when %c or %x is used -- considering that for some locales
and platforms (e.g., 'C.UTF-8' on Linux), formatting with either
%c or %x may produce a year number representation that is shorter
than the usual four or two digits, if the number is small enough
(e.g., '999' instead of `0999', or '9' instead of '09').

Note that this helper is not used to generate the regex patterns
for %Y and %y (these two still match, respectively, only four or
two digits, exactly).

"""
pattern = self.pattern(format)
pattern = pattern.replace(self['Y'], r"(?P<Y>\d{1,4})")
pattern = pattern.replace(self['y'], r"(?P<y>\d{1,2})")
return pattern

def pattern(self, format):
"""Return regex pattern for the format string.

Expand Down Expand Up @@ -374,6 +394,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
# U, W
# worthless without day of the week
if group_key == 'y':
# 1 or 2 digits (1 only for directive c or x; see TimeRE.__init__)
year = int(found_dict['y'])
# Open Group specification for strptime() states that a %y
#value in the range of [00, 68] is in the century 2000, while
Expand All @@ -383,6 +404,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
else:
year += 1900
elif group_key == 'Y':
# 1-4 digits (1-3 only for directive c or x; see TimeRE.__init__)
year = int(found_dict['Y'])
elif group_key == 'G':
iso_year = int(found_dict['G'])
Expand Down
131 changes: 131 additions & 0 deletions Lib/test/datetimetester.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,40 @@ def test_strptime_leap_year(self):
date.strptime('20-03-14', '%y-%m-%d')
date.strptime('02-29,2024', '%m-%d,%Y')

def test_strftime_strptime_roundtrip(self):
for fmt in [
'%c',
'%x',
'%Y%m%d',
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
]:
with self.subTest(fmt=fmt):
sample = date(1999, 3, 17).strftime(fmt)
if '1999' in sample:
year_seq = [
1, 9, 10, 99, 100, 999, # <- gh-124529 (ad %c/%x)
1000, 1410, 1989, 2024, 2095, 9999]
elif '99' in sample:
year_seq = [
1969, 1999,
2000, 2001, 2009, # <- gh-124529 (ad %c/%x)
2068]
else:
self.skipTest(f"these subtests need locale for which "
f"{fmt!r} includes year in some variant")
for year in year_seq:
for instance in [
date(year, 1, 1),
date(year, 6, 4),
date(year, 12, 31),
]:
reason = (f'strftime/strptime roundtrip '
f'for {fmt=} and {year=}')
with self.subTest(reason=reason, instance=instance):
formatted = instance.strftime(fmt)
parsed = date.strptime(formatted, fmt)
self.assertEqual(parsed, instance, msg=reason)

class SubclassDate(date):
sub_var = 1

Expand Down Expand Up @@ -2124,6 +2158,35 @@ def test_fromisocalendar_type_errors(self):
with self.assertRaises(TypeError):
self.theclass.fromisocalendar(*isocal)

def test_strptime_accepting_year_with_fewer_digits(self): # gh-124529
concerned_formats = '%c', '%x'

def run_subtest():
reason = (f'strptime accepting year with fewer '
f'digits for {fmt=} and {input_string=}')
with self.subTest(reason=reason):
expected = prototype_inst.replace(year=year)
parsed = self.theclass.strptime(input_string, fmt)
self.assertEqual(parsed, expected, msg=reason)

prototype_inst = self.theclass.strptime('1999', '%Y')
for fmt in concerned_formats:
with self.subTest(fmt=fmt):
sample = prototype_inst.strftime(fmt)
if (sample_4digits := '1999') in sample:
for year in [1, 9, 10, 99, 100, 999]:
y_digits = str(year)
input_string = sample.replace(sample_4digits, y_digits)
run_subtest()
elif (sample_2digits := '99') in sample:
for year in [2000, 2001, 2009]:
y_digits = str(year - 2000)
input_string = sample.replace(sample_2digits, y_digits)
run_subtest()
else:
self.skipTest(f"these subtests need locale for which "
f"{fmt!r} includes year in some variant")


#############################################################################
# datetime tests
Expand Down Expand Up @@ -2955,6 +3018,48 @@ def test_more_strftime(self):
except UnicodeEncodeError:
pass

def test_strftime_strptime_roundtrip(self):
for tz in [
None,
UTC,
timezone(timedelta(hours=2)),
timezone(timedelta(hours=-7)),
]:
fmt_suffix = '' if tz is None else ' %z'
for fmt in [
'%c %f',
'%x %X %f',
'%Y%m%d%H%M%S%f',
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
]:
fmt += fmt_suffix
with self.subTest(fmt=fmt):
sample = self.theclass(1999, 3, 17, 0, 0).strftime(fmt)
if '1999' in sample:
year_seq = [
1, 9, 10, 99, 100, 999, # <- gh-124529 (ad %c/%x)
1000, 1410, 1989, 2024, 2095, 9999]
elif '99' in sample:
year_seq = [
1969, 1999,
2000, 2001, 2009, # <- gh-124529 (ad %c/%x)
2068]
else:
self.skipTest(f"these subtests need locale for which "
f"{fmt!r} includes year in some variant")
for year in year_seq:
for instance in [
self.theclass(year, 1, 1, 0, 0, 0, tzinfo=tz),
self.theclass(year, 6, 4, 1, 42, 7, 99, tzinfo=tz),
self.theclass(year, 12, 31, 23, 59, 59, tzinfo=tz),
]:
reason = (f'strftime/strptime roundtrip '
f'for {fmt=} and {year=}')
with self.subTest(reason=reason, instance=instance):
formatted = instance.strftime(fmt)
parsed = self.theclass.strptime(formatted, fmt)
self.assertEqual(parsed, instance, msg=reason)

def test_extract(self):
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
self.assertEqual(dt.date(), date(2002, 3, 4))
Expand Down Expand Up @@ -3901,6 +4006,32 @@ def test_strptime_single_digit(self):
newdate = self.theclass.strptime(string, format)
self.assertEqual(newdate, target, msg=reason)

def test_strftime_strptime_roundtrip(self):
for tz in [
None,
UTC,
timezone(timedelta(hours=2)),
timezone(timedelta(hours=-7)),
]:
fmt_suffix = '' if tz is None else ' %z'
for fmt in [
'%c %f',
'%X %f',
'%H%M%S%f',
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
]:
fmt += fmt_suffix
for instance in [
self.theclass(0, 0, 0, tzinfo=tz),
self.theclass(1, 42, 7, tzinfo=tz),
self.theclass(23, 59, 59, 654321, tzinfo=tz),
]:
reason = f'strftime/strptime round trip for {fmt=}'
with self.subTest(reason=reason, instance=instance):
formatted = instance.strftime(fmt)
parsed = self.theclass.strptime(formatted, fmt)
self.assertEqual(parsed, instance, msg=reason)

def test_bool(self):
# time is always True.
cls = self.theclass
Expand Down
129 changes: 125 additions & 4 deletions Lib/test/test_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,42 @@ def test_compile(self):
for directive in ('a','A','b','B','c','d','G','H','I','j','m','M','p',
'S','u','U','V','w','W','x','X','y','Y','Z','%'):
fmt = "%d %Y" if directive == 'd' else "%" + directive
input_string = time.strftime(fmt)
compiled = self.time_re.compile(fmt)
found = compiled.match(time.strftime(fmt))
self.assertTrue(found, "Matching failed on '%s' using '%s' regex" %
(time.strftime(fmt),
compiled.pattern))
found = compiled.match(input_string)
self.assertTrue(found,
(f"Matching failed on '{input_string}' "
f"using '{compiled.pattern}' regex"))
for directive in ('c', 'x'):
fmt = "%" + directive
with self.subTest(f"{fmt!r} should match input containing "
f"year with fewer digits than usual"):
# gh-124529
params = _input_str_and_expected_year_for_few_digits_year(fmt)
if params is None:
self.skipTest(f"this subtest needs locale for which "
f"{fmt!r} includes year in some variant")
input_string, _ = params
compiled = self.time_re.compile(fmt)
found = compiled.match(input_string)
self.assertTrue(found,
(f"Matching failed on '{input_string}' "
f"using '{compiled.pattern}' regex"))
for directive in ('y', 'Y'):
fmt = "%" + directive
with self.subTest(f"{fmt!r} should not match input containing "
f"year with fewer digits than usual"):
params = _input_str_and_expected_year_for_few_digits_year(fmt)
if params is None:
self.skipTest(f"this subtest needs locale for which "
f"{fmt!r} includes year in some variant")
input_string, _ = params
compiled = self.time_re.compile(fmt)
found = compiled.match(input_string)
self.assertFalse(found,
(f"Matching unexpectedly succeeded "
f"on '{input_string}' using "
f"'{compiled.pattern}' regex"))

def test_blankpattern(self):
# Make sure when tuple or something has no values no regex is generated.
Expand Down Expand Up @@ -299,6 +330,25 @@ def helper(self, directive, position):
(directive, strf_output, strp_output[position],
self.time_tuple[position]))

def helper_for_directives_accepting_few_digits_year(self, directive):
fmt = "%" + directive
params = _input_str_and_expected_year_for_few_digits_year(fmt)
if params is None:
self.skipTest(f"test needs locale for which {fmt!r} "
f"includes year in some variant")
input_string, expected_year = params
try:
output_year = _strptime._strptime(input_string, fmt)[0][0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is't possible for _strptime._strptime to return a one-dim list?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It always returns a 3-tuple, the 0th item of which is a 9-tuple, the 0th item of which is a int representing the year number.

except ValueError as exc:
# See: gh-124529
self.fail(f"testing of {directive!r} directive failed; "
f"{input_string!r} -> exception: {exc!r}")
else:
self.assertEqual(output_year, expected_year,
(f"testing of {directive!r} directive failed; "
f"{input_string!r} -> output including year "
f"{output_year!r} != {expected_year!r}"))

def test_year(self):
# Test that the year is handled properly
for directive in ('y', 'Y'):
Expand All @@ -312,6 +362,17 @@ def test_year(self):
"'y' test failed; passed in '%s' "
"and returned '%s'" % (bound, strp_output[0]))

def test_bad_year(self):
for directive, bad_inputs in (
('y', ('9', '100', 'ni')),
('Y', ('7', '42', '999', '10000', 'SPAM')),
):
fmt = "%" + directive
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you can use f-string.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both ways are valid; the one I chose is more consistent with the existing code in this module.

for input_val in bad_inputs:
with self.subTest(directive=directive, input_val=input_val):
with self.assertRaises(ValueError):
_strptime._strptime_time(input_val, fmt)

def test_month(self):
# Test for month directives
for directive in ('B', 'b', 'm'):
Expand Down Expand Up @@ -454,11 +515,21 @@ def test_date_time(self):
for position in range(6):
self.helper('c', position)

def test_date_time_accepting_few_digits_year(self): # gh-124529
# Test %c directive with input containing year
# number consisting of fewer digits than usual
self.helper_for_directives_accepting_few_digits_year('c')

def test_date(self):
# Test %x directive
for position in range(0,3):
self.helper('x', position)

def test_date_accepting_few_digits_year(self): # gh-124529
# Test %x directive with input containing year
# number consisting of fewer digits than usual
self.helper_for_directives_accepting_few_digits_year('x')

def test_time(self):
# Test %X directive
for position in range(3,6):
Expand Down Expand Up @@ -769,5 +840,55 @@ def test_TimeRE_recreation_timezone(self):
_strptime._strptime_time(oldtzname[1], '%Z')


def _input_str_and_expected_year_for_few_digits_year(fmt):
# This helper, for the given format string (fmt), returns a 2-tuple:
# (<strptime input string>, <expected year>)
# where:
# * <strptime input string> -- is a `strftime(fmt)`-result-like str
# containing a year number which is *shorter* than the usual four
# or two digits (namely: the contained year number consist of just
# one digit: 7; the choice of this particular digit is arbitrary);
# * <expected year> -- is an int representing the year number that
# is expected to be part of the result of a `strptime(<strptime
# input string>, fmt)` call (namely: either 7 or 2007, depending
# on the given format string and current locale...); however, it
# is None if <strptime input string> does *not* contain the year
# part (for the given format string and current locale).

# 1. Prepare auxiliary *magic* time data (note that the magic values
# we use here are guaranteed to be compatible with `time.strftime()`
# and also well distinguishable within a formatted string, thanks to
# the fact that the amount of overloaded numbers is minimized, as in
# `_strptime.LocaleTime.__calc_date_time()`...):
magic_year = 1999
magic_tt = (magic_year, 3, 17, 22, 44, 55, 2, 76, 0)
magic_4digits = str(magic_year)
magic_2digits = magic_4digits[-2:]

# 2. Pick our example year whose representation
# is shorter than the usual four or two digits:
input_year_str = '7'

# 3. Determine the <strptime input string> part of the return value:
input_string = time.strftime(fmt, magic_tt)
if (index_4digits := input_string.find(magic_4digits)) != -1:
# `input_string` contains up-to-4-digit year representation
input_string = input_string.replace(magic_4digits, input_year_str)
if (index_2digits := input_string.find(magic_2digits)) != -1:
# `input_string` contains up-to-2-digit year representation
input_string = input_string.replace(magic_2digits, input_year_str)

# 4. Determine the <expected year> part of the return value:
if index_4digits > index_2digits:
expected_year = int(input_year_str)
elif index_4digits < index_2digits:
expected_year = 2000 + int(input_year_str)
else:
assert index_4digits == index_2digits == -1
expected_year = None

return input_string, expected_year


if __name__ == '__main__':
unittest.main()
Loading
Loading