Skip to content

Commit 849699a

Browse files
committed
Adds support for the alternative tokens in from_format()
1 parent d4a98db commit 849699a

File tree

5 files changed

+303
-6
lines changed

5 files changed

+303
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### Added
66

7+
- Added support for the alternative formatter's tokens in `from_format()`
78
- Added a `timezones` module attribute to expose available timezones.
89

910

docs/_docs/instantiation.rst

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,25 @@ The difference being the addition the ``tz`` argument that can be a ``tzinfo`` i
110110
111111
pendulum.from_format('1975-05-21 22', '%Y-%m-%d %H').to_datetime_string()
112112
'1975-05-21 22:00:00'
113-
pendulum.from_format('1975-05-21 22', '%Y-%m-%d %H', 'Europe/London').isoformat()
113+
pendulum.from_format('1975-05-21 22', '%Y-%m-%d %H', tz='Europe/London').isoformat()
114114
'1975-05-21T22:00:00+01:00'
115115
116116
# Using strptime is also possible (the timezone will be UTC)
117117
pendulum.strptime('1975-05-21 22', '%Y-%m-%d %H').isoformat()
118118
119+
.. note::
120+
121+
``from_format()`` also accepts a ``formatter`` keyword argument to use the
122+
`Alternative Formatter`_'s tokens.
123+
124+
.. code-block:: python
125+
126+
import pendulum
127+
128+
pendulum.from_format('1975-05-21 22', 'YYYY-MM-DD HH', formatter='alternative')
129+
130+
Note that it will be the only one supported in the next major version.
131+
119132
The final ``create`` function is for working with unix timestamps.
120133
``from_timestamp()`` will create a ``Pendulum`` instance equal to the given timestamp
121134
and will set the timezone as well or default it to ``UTC``.

pendulum/formatting/alternative_formatter.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,26 @@
66
from .formatter import Formatter
77

88

9+
_MATCH_1 = re.compile('\d')
10+
_MATCH_2 = re.compile('\d\d')
11+
_MATCH_3 = re.compile('\d{3}')
12+
_MATCH_4 = re.compile('\d{4}')
13+
_MATCH_6 = re.compile('[+-]?\d{6}')
14+
_MATCH_1_TO_2 = re.compile('\d\d?')
15+
_MATCH_1_TO_3 = re.compile('\d{1,3}')
16+
_MATCH_1_TO_4 = re.compile('\d{1,4}')
17+
_MATCH_1_TO_6 = re.compile('[+-]?\d{1,6}')
18+
_MATCH_3_TO_4 = re.compile('\d{3}\d?')
19+
_MATCH_5_TO_6 = re.compile('\d{5}\d?')
20+
_MATCH_UNSIGNED = re.compile('\d+')
21+
_MATCH_SIGNED = re.compile('[+-]?\d+')
22+
_MATCH_OFFSET = re.compile('(?i)Z|[+-]\d\d:?\d\d')
23+
_MATCH_SHORT_OFFSET = re.compile('(?i)Z|[+-]\d\d(?::?\d\d)?')
24+
_MATCH_TIMESTAMP = re.compile('[+-]?\d+(\.\d{1,3})?')
25+
_MATCH_WORD = re.compile("[0-9]*['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+|[\u0600-\u06FF\/]+(\s*?[\u0600-\u06FF]+){1,2}")
26+
27+
28+
929
class AlternativeFormatter(Formatter):
1030

1131
_TOKENS = '\[([^\[]*)\]|\\\(.)|' \
@@ -99,6 +119,71 @@ class AlternativeFormatter(Formatter):
99119
'LLLL': 'dddd, MMMM D, YYYY h:mm A',
100120
}
101121

122+
_REGEX_TOKENS = {
123+
'Y': _MATCH_SIGNED,
124+
'YY': (_MATCH_1_TO_2, _MATCH_2),
125+
'YYYY': (_MATCH_1_TO_4, _MATCH_4),
126+
'Q': _MATCH_1,
127+
'Qo': None,
128+
'M': _MATCH_1_TO_2,
129+
'MM': (_MATCH_1_TO_2, _MATCH_2),
130+
'MMM': None,
131+
'MMMM': None,
132+
'D': _MATCH_1_TO_2,
133+
'DD': (_MATCH_1_TO_2, _MATCH_2),
134+
'DDD': _MATCH_1_TO_3,
135+
'DDDD': _MATCH_3,
136+
'Do': None,
137+
'H': _MATCH_1_TO_2,
138+
'HH': (_MATCH_1_TO_2, _MATCH_2),
139+
'h': _MATCH_1_TO_2,
140+
'hh': (_MATCH_1_TO_2, _MATCH_2),
141+
'm': _MATCH_1_TO_2,
142+
'mm': (_MATCH_1_TO_2, _MATCH_2),
143+
's': _MATCH_1_TO_2,
144+
'ss': (_MATCH_1_TO_2, _MATCH_2),
145+
'S': (_MATCH_1_TO_3, _MATCH_1),
146+
'SS': (_MATCH_1_TO_3, _MATCH_2),
147+
'SSS': (_MATCH_1_TO_3, _MATCH_3),
148+
'SSSS': _MATCH_UNSIGNED,
149+
'SSSSS': _MATCH_UNSIGNED,
150+
'SSSSSS': _MATCH_UNSIGNED,
151+
'a': None,
152+
'x': _MATCH_SIGNED,
153+
'X': re.compile('[+-]?\d+(\.\d{1,3})?')
154+
}
155+
156+
_PARSE_TOKENS = {
157+
'YYYY': lambda year: int(year),
158+
'YY': lambda year: 1900 + int(year),
159+
'Q': lambda quarter: int(quarter),
160+
'MMMM': lambda month: None,
161+
'MMM': lambda month: None,
162+
'MM': lambda month: int(month),
163+
'M': lambda month: int(month),
164+
'DDDD': lambda day: int(day),
165+
'DDD': lambda day: int(day),
166+
'DD': lambda day: int(day),
167+
'D': lambda day: int(day),
168+
'HH': lambda hour: int(hour),
169+
'H': lambda hour: int(hour),
170+
'hh': lambda hour: int(hour),
171+
'h': lambda hour: int(hour),
172+
'mm': lambda minute: int(minute),
173+
'm': lambda minute: int(minute),
174+
'ss': lambda second: int(second),
175+
's': lambda second: int(second),
176+
'S': lambda us: int(us) * 100000,
177+
'SS': lambda us: int(us) * 10000,
178+
'SSS': lambda us: int(us) * 1000,
179+
'SSSS': lambda us: int(us) * 100,
180+
'SSSSS': lambda us: int(us) * 10,
181+
'SSSSSS': lambda us: int(us),
182+
'a': lambda meridiem: None,
183+
'X': lambda ts: float(ts),
184+
'x': lambda ts: float(ts) / 1e3,
185+
}
186+
102187
def format(self, dt, fmt, locale=None):
103188
"""
104189
Formats a Pendulum instance with a given format and locale.
@@ -232,3 +317,97 @@ def _format_localizable_token(self, dt, token, locale):
232317
return self._format_localizable_token(dt, token, 'en')
233318

234319
return trans
320+
321+
def parse(self, time, fmt):
322+
"""
323+
Parses a time string matching a given format as a tuple.
324+
325+
:param time: The timestring
326+
:type time: str
327+
328+
:param fmt: The format
329+
:type fmt: str
330+
331+
:rtype: tuple
332+
"""
333+
tokens = self._FORMAT_RE.findall(fmt)
334+
if not tokens:
335+
return time
336+
337+
parsed = {
338+
'year': None,
339+
'month': None,
340+
'day': None,
341+
'hour': None,
342+
'minute': None,
343+
'second': None,
344+
'microsecond': None,
345+
'tz': None,
346+
'quarter': None,
347+
'day_of_week': None,
348+
'day_of_year': None,
349+
'meridiem': None,
350+
'timestamp': None
351+
}
352+
353+
pattern = self._FORMAT_RE.sub(lambda m: self._replace_tokens(m.group(0)), fmt)
354+
355+
if not re.match(pattern, time):
356+
raise ValueError('String does not match format {}'.format(fmt))
357+
358+
re.sub(pattern, lambda m: self._get_parsed_values(m, parsed), time)
359+
360+
return parsed
361+
362+
def _get_parsed_values(self, m, parsed):
363+
for token, index in m.re.groupindex.items():
364+
self._get_parsed_value(token, m.group(index), parsed)
365+
366+
def _get_parsed_value(self, token, value, parsed):
367+
parsed_token = self._PARSE_TOKENS[token](value)
368+
369+
if 'Y' in token:
370+
parsed['year'] = parsed_token
371+
elif 'Q' == token:
372+
parsed['quarter'] = parsed_token
373+
elif 'M' in token:
374+
parsed['month'] = parsed_token
375+
elif token in ['DDDD', 'DDD']:
376+
parsed['day_of_year'] = parsed_token
377+
elif 'D' in token:
378+
parsed['day'] = parsed_token
379+
elif 'H' in token:
380+
parsed['hour'] = parsed_token
381+
elif token in ['hh', 'h']:
382+
parsed['hour'] = parsed_token
383+
elif 'm' in token:
384+
parsed['minute'] = parsed_token
385+
elif 's' in token:
386+
parsed['second'] = parsed_token
387+
elif 'S' in token:
388+
parsed['microsecond'] = parsed_token
389+
elif token in ['MMM', 'MMMM']:
390+
parsed['day_of_week'] = parsed_token
391+
elif token == 'a':
392+
pass
393+
elif token in ['X', 'x']:
394+
parsed['timestamp'] = parsed_token
395+
396+
def _replace_tokens(self, token):
397+
if token.startswith('[') and token.endswith(']'):
398+
return token[1:-1]
399+
elif token.startswith('\\'):
400+
return token[1:]
401+
elif token not in self._REGEX_TOKENS:
402+
raise ValueError('Unsupported token: {}'.format(token))
403+
404+
candidates = self._REGEX_TOKENS[token]
405+
if not candidates:
406+
raise ValueError('Unsupported token: {}'.format(token))
407+
408+
if not isinstance(candidates, tuple):
409+
candidates = (candidates,)
410+
411+
pattern = '(?P<{}>{})'.format(token, '|'.join([p.pattern for p in candidates]))
412+
413+
return pattern

pendulum/pendulum.py

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import calendar
66
import datetime
77

8+
import pendulum
9+
810
from .date import Date
911
from .time import Time
1012
from .period import Period
@@ -13,6 +15,7 @@
1315
from .tz.timezone_info import TimezoneInfo
1416
from .parsing import parse
1517
from .helpers import add_duration
18+
from .formatting import FORMATTERS
1619
from .constants import (
1720
YEARS_PER_CENTURY, YEARS_PER_DECADE,
1821
MONTHS_PER_YEAR,
@@ -373,7 +376,7 @@ def create(cls, year=None, month=None, day=None,
373376
return cls.instance(dt, tz)
374377

375378
@classmethod
376-
def create_from_format(cls, time, fmt, tz=UTC):
379+
def create_from_format(cls, time, fmt, tz=UTC, formatter='classic'):
377380
"""
378381
Create a Pendulum instance from a specific format.
379382
@@ -386,11 +389,83 @@ def create_from_format(cls, time, fmt, tz=UTC):
386389
:param tz: The timezone
387390
:type tz: tzinfo or str or int or None
388391
392+
:param formatter: The formatter to use. Default "classic"
393+
:type formatter: str
394+
389395
:rtype: Pendulum
390396
"""
391-
dt = datetime.datetime.strptime(time, fmt)
397+
if formatter not in FORMATTERS:
398+
raise ValueError('Invalid formatter [{}]'.format(formatter))
392399

393-
return cls.instance(dt, tz)
400+
if formatter == 'classic':
401+
dt = datetime.datetime.strptime(time, fmt)
402+
403+
return cls.instance(dt, tz)
404+
405+
formatter = FORMATTERS['alternative']
406+
407+
parts = formatter.parse(time, fmt)
408+
actual_parts = {}
409+
410+
# If timestamp has been specified
411+
# we use it and don't go any further
412+
if parts['timestamp'] is not None:
413+
return cls.create_from_timestamp(parts['timestamp'])
414+
415+
if parts['quarter'] is not None:
416+
dt = cls.now().start_of('year')
417+
418+
while dt.quarter != parts['quarter']:
419+
dt = dt.add(months=3)
420+
421+
actual_parts['year'] = dt.year
422+
actual_parts['month'] = dt.month
423+
actual_parts['day'] = dt.day
424+
425+
# If the date part has not been specified
426+
# we default to today
427+
if all([parts['year'] is None, parts['month'] is None, parts['day'] is None]):
428+
now = cls.now()
429+
430+
parts['year'] = actual_parts['year'] = now.year
431+
parts['month'] = actual_parts['month'] = now.month
432+
parts['day'] = actual_parts['day'] = now.day
433+
434+
# We replace any not set month/day value
435+
# with the first of each unit
436+
if any([parts['month'] is None, parts['day'] is None]):
437+
for part in ['month', 'day']:
438+
if parts[part] is None and actual_parts.get(part) is None:
439+
actual_parts[part] = 1
440+
441+
for part in ['year', 'month', 'day']:
442+
if parts[part] is not None:
443+
actual_parts[part] = parts[part]
444+
445+
# If any of hour/minute/second/microsecond is not set
446+
# We assume start of corresponding value
447+
for part in ['hour', 'minute', 'second', 'microsecond']:
448+
if parts[part] is None:
449+
actual_parts[part] = 0
450+
else:
451+
actual_parts[part] = parts[part]
452+
453+
if parts['day_of_year'] is not None:
454+
text = '{}-{}'.format(
455+
actual_parts['year'], parts['day_of_year']
456+
)
457+
dt = parse(text)
458+
459+
actual_parts['month'] = dt['month']
460+
actual_parts['day'] = dt['day']
461+
462+
# Meridiem
463+
if parts['meridiem'] is not None:
464+
pass
465+
466+
actual_parts['tz'] = parts['tz'] or tz
467+
468+
return cls.create(**actual_parts)
394469

395470
@classmethod
396471
def create_from_timestamp(cls, timestamp, tz=UTC):
@@ -419,7 +494,7 @@ def create_from_timestamp(cls, timestamp, tz=UTC):
419494

420495
@classmethod
421496
def strptime(cls, time, fmt):
422-
return cls.create_from_format(time, fmt)
497+
return cls.create_from_format(time, fmt, formatter='classic')
423498

424499
def copy(self):
425500
"""

tests/pendulum_tests/test_create_from_format.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# -*- coding: utf-8 -*-
22

3-
import pytz
3+
import pytest
4+
import pendulum
5+
46
from pendulum import Pendulum, timezone
57
from .. import AbstractTestCase
68

@@ -37,3 +39,30 @@ def test_strptime_is_create_from_format(self):
3739
self.assertPendulum(d, 1975, 5, 21, 22, 32, 11)
3840
self.assertIsInstanceOfPendulum(d)
3941
self.assertEqual('UTC', d.timezone_name)
42+
43+
44+
@pytest.mark.parametrize('text,fmt,expected', [
45+
('2014-4', 'YYYY-Q', '2014-10-01T00:00:00+00:00'),
46+
('12-02-1999', 'MM-DD-YYYY', '1999-12-02T00:00:00+00:00'),
47+
('12-02-1999', 'DD-MM-YYYY', '1999-02-12T00:00:00+00:00'),
48+
('12/02/1999', 'DD/MM/YYYY', '1999-02-12T00:00:00+00:00'),
49+
('12_02_1999', 'DD_MM_YYYY', '1999-02-12T00:00:00+00:00'),
50+
('12:02:1999', 'DD:MM:YYYY', '1999-02-12T00:00:00+00:00'),
51+
('2-2-99', 'D-M-YY', '1999-02-02T00:00:00+00:00'),
52+
('99', 'YY', '1999-01-01T00:00:00+00:00'),
53+
('300-1999', 'DDD-YYYY', '1999-10-27T00:00:00+00:00'),
54+
('12-02-1999 2:45:10', 'DD-MM-YYYY h:m:s', '1999-02-12T02:45:10+00:00'),
55+
('12-02-1999 12:45:10', 'DD-MM-YYYY h:m:s', '1999-02-12T12:45:10+00:00'),
56+
('12:00:00', 'HH:mm:ss', '2015-11-12T12:00:00+00:00'),
57+
('12:30:00', 'HH:mm:ss', '2015-11-12T12:30:00+00:00'),
58+
('00:00:00', 'HH:mm:ss', '2015-11-12T00:00:00+00:00'),
59+
('00:30:00 1', 'HH:mm:ss S', '2015-11-12T00:30:00.100000+00:00'),
60+
('00:30:00 12', 'HH:mm:ss SS', '2015-11-12T00:30:00.120000+00:00'),
61+
('00:30:00 123', 'HH:mm:ss SSS', '2015-11-12T00:30:00.123000+00:00'),
62+
('1234567890', 'X', '2009-02-13T23:31:30+00:00'),
63+
('1234567890123', 'x', '2009-02-13T23:31:30.123000+00:00'),
64+
('2016-10-06', 'YYYY-MM-DD', '2016-10-06T00:00:00+00:00'),
65+
])
66+
def test_from_format(text, fmt, expected):
67+
with pendulum.test(pendulum.create(2015, 11, 12)):
68+
assert pendulum.from_format(text, fmt, formatter='alternative').isoformat() == expected

0 commit comments

Comments
 (0)