-
-
Notifications
You must be signed in to change notification settings - Fork 827
Add preliminary support for ISO-8601 timestamps via date: archive match pattern (#8715) #8776
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 9 commits
282d70c
db46cdb
4363bf7
69e8608
5c20d8f
6f1bcd4
4060e94
e9a8c5f
470758d
df2d33d
870bf7a
461df75
9553c35
409733b
de03806
796981c
7b8a194
8e3f1e4
904853d
6032c4a
9cb5e5f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| import os | ||
| import re | ||
| from datetime import datetime, timezone, timedelta | ||
| from zoneinfo import ZoneInfo | ||
|
|
||
|
|
||
| def parse_timestamp(timestamp, tzinfo=timezone.utc): | ||
|
|
@@ -185,3 +186,142 @@ def isoformat(self): | |
| def archive_ts_now(): | ||
| """return tz-aware datetime obj for current time for usage as archive timestamp""" | ||
| return datetime.now(timezone.utc) # utc time / utc timezone | ||
|
|
||
|
|
||
| class DatePatternError(ValueError): | ||
| """Raised when a date: archive pattern cannot be parsed.""" | ||
|
|
||
|
|
||
| def exact_predicate(dt: datetime): | ||
| """Return predicate matching archives whose ts equals dt (UTC).""" | ||
| dt_utc = dt.astimezone(timezone.utc) | ||
| return lambda ts: ts.astimezone(timezone.utc) == dt_utc | ||
|
|
||
|
|
||
| def interval_predicate(start: datetime, end: datetime): | ||
| start_utc = start.astimezone(timezone.utc) | ||
| end_utc = end.astimezone(timezone.utc) | ||
| return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc | ||
|
|
||
|
|
||
| def parse_tz(tzstr: str): | ||
| """ | ||
| Parses a UTC offset like +08:00 or [Region/Name] into a timezone object. | ||
| """ | ||
| if not tzstr: | ||
| return None | ||
| if tzstr == "Z": | ||
| return timezone.utc | ||
| if tzstr[0] in "+-": | ||
| sign = 1 if tzstr[0] == "+" else -1 | ||
| try: | ||
| hh, mm = map(int, tzstr[1:].split(":")) | ||
| if not (0 <= mm < 60): | ||
| raise ValueError | ||
| except Exception: | ||
| raise DatePatternError("invalid UTC offset format") | ||
| # we do it this way so that, for example, -8:30 is | ||
| # -8 hours and -30 minutes, not -8 hours and +30 minutes | ||
| total_minutes = sign * (hh * 60 + mm) | ||
| # enforce ISO-8601 bounds (-12:00 to +14:00) | ||
| if not (-12 * 60 <= total_minutes <= 14 * 60): | ||
| raise DatePatternError("UTC offset outside ISO-8601 bounds") | ||
| return timezone(timedelta(minutes=total_minutes)) | ||
| # [Region/Name] | ||
| try: | ||
| return ZoneInfo(tzstr.strip("[]")) | ||
| except Exception: | ||
| raise DatePatternError("invalid timezone format") | ||
|
|
||
|
|
||
| def compile_date_pattern(expr: str): | ||
| """ | ||
| Accepts any of: | ||
| YYYY | ||
| YYYY-MM | ||
| YYYY-MM-DD | ||
| YYYY-MM-DDTHH | ||
| YYYY-MM-DDTHH:MM | ||
| YYYY-MM-DDTHH:MM:SS | ||
| Unix epoch (@123456789) | ||
| …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). | ||
| Returns a predicate that is True for timestamps in that interval. | ||
| """ | ||
| expr = expr.strip() | ||
| pattern = r""" | ||
| ^ | ||
| (?: | ||
| (?P<fraction>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+) # full timestamp with fraction | ||
| | (?P<second> \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction | ||
| | (?P<minute> \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision | ||
| | (?P<hour> \d{4}-\d{2}-\d{2}T\d{2}) # hour precision | ||
| | (?P<day> \d{4}-\d{2}-\d{2}) # day precision | ||
| | (?P<month> \d{4}-\d{2}) # month precision | ||
| | (?P<year> \d{4}) # year precision | ||
| | @(?P<epoch>\d+) # unix epoch | ||
| ) | ||
| (?P<tz>Z|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City] | ||
| $ | ||
| """ | ||
| m = re.match(pattern, expr, re.VERBOSE) | ||
| if not m: | ||
| raise DatePatternError(f"unrecognised date: {expr!r}") | ||
|
|
||
| gd = m.groupdict() | ||
| tz = parse_tz(gd.get("tz")) # will be None if tzstr is empty -> local timezone | ||
|
|
||
| # unix epoch and user-specified timezone are mutually exclusive | ||
| if gd["epoch"] and tz is not None: | ||
| raise DatePatternError("unix‐epoch patterns (@123456789) are UTC and must not include a timezone suffix") | ||
|
|
||
| # 1) fractional‐second exact match | ||
| if gd["fraction"]: | ||
| ts = gd["fraction"] | ||
| dt = parse_timestamp(ts, tzinfo=tz) | ||
| return exact_predicate(dt) | ||
|
|
||
| # 2) second‐precision interval | ||
| if gd["second"]: | ||
| ts = gd["second"] | ||
| start = parse_timestamp(ts, tzinfo=tz) | ||
| # within one second | ||
| return interval_predicate(start, start + timedelta(seconds=1)) | ||
|
|
||
| # 3) minute‐precision interval | ||
| if gd["minute"]: | ||
| ts = gd["minute"] + ":00" | ||
| start = parse_timestamp(ts, tzinfo=tz) | ||
| return interval_predicate(start, start + timedelta(minutes=1)) | ||
|
|
||
| # 4) hour‐precision interval | ||
| if gd["hour"]: | ||
| ts = gd["hour"] + ":00:00" | ||
| start = parse_timestamp(ts, tzinfo=tz) | ||
| return interval_predicate(start, start + timedelta(hours=1)) | ||
|
||
|
|
||
| # 5a) day‐precision interval | ||
| if gd["day"]: | ||
| ts = gd["day"] + "T00:00:00" | ||
| start = parse_timestamp(ts, tzinfo=tz) | ||
| return interval_predicate(start, start + timedelta(days=1)) | ||
|
|
||
| # 5b) month‐precision interval | ||
| if gd["month"]: | ||
| ts = gd["month"] + "-01T00:00:00" | ||
| start = parse_timestamp(ts, tzinfo=tz) | ||
| return interval_predicate(start, offset_n_months(start, 1)) | ||
|
|
||
| # 5c) year‐precision interval | ||
| if gd["year"]: | ||
| ts = gd["year"] + "-01-01T00:00:00" | ||
| start = parse_timestamp(ts, tzinfo=tz) | ||
| return interval_predicate(start, offset_n_months(start, 12)) | ||
|
|
||
| # 6) unix‐epoch exact‐second match | ||
| if gd["epoch"]: | ||
| epoch = int(gd["epoch"]) | ||
| start = datetime.fromtimestamp(epoch, tz=timezone.utc) | ||
| return interval_predicate(start, start + timedelta(seconds=1)) | ||
|
|
||
| # should never get here | ||
| raise DatePatternError(f"unrecognised date: {expr!r}") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting approach.
What I meant was rather something like (simplified to cover only YYYY and YYYY-MM here as an example):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes this would've been much simpler. Will try to work on refactoring it to this approach tomorrow.