Skip to content
This repository was archived by the owner on Feb 6, 2025. It is now read-only.

Commit 34c968b

Browse files
authored
Allow microseconds in datetimes and allow identity-function parsing. (#991)
* Allow microseconds in datetimes, allow identity-function parsing. * Touch up expected schema in test data. * Switch from arrow to ciso8601 for date/datetime parsing. * Allow dates to be passed in place of datetimes, since the conversion is widening. * Delint. * Update scalar type docstrings in test schema. * Remove pytz imports. * Resolve lint issues. * Tighten mypy.ini file. * Explicitly mention date/datetime format is ciso8601 in error messages.
1 parent 3c71465 commit 34c968b

File tree

13 files changed

+436
-297
lines changed

13 files changed

+436
-297
lines changed

Pipfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ redis = ">=3.2.1,<4"
1414
redisgraph = ">=1.7,<1.9"
1515

1616
# Linters and other development tools
17-
bandit = ">=1.5.1,<2"
17+
bandit = ">=1.5.1,<1.7" # 1.7 has a blocking regression: https://github.com/PyCQA/bandit/issues/658
1818
black = "==20.8b1" # This is still marked as a beta release, pin it explicitly: https://github.com/pypa/pipenv/issues/1760
1919
codecov = ">=2.0.15,<3"
2020
flake8 = ">=3.6.0,<4"
@@ -39,10 +39,9 @@ sphinx-rtd-theme = ">=0.4.3,<1"
3939
sphinx = ">=1.8,<2"
4040

4141
[packages] # Make sure to keep in sync with setup.py requirements.
42-
arrow = ">=0.15.0,<1"
42+
ciso8601 = ">=2.1.3,<3"
4343
funcy = ">=1.7.3,<2"
4444
graphql-core = ">=3.1.2,<3.2" # minor versions sometimes contain breaking changes
45-
pytz = ">=2017.2"
4645
six = ">=1.10.0"
4746
sqlalchemy = ">=1.3.0,<2"
4847

Pipfile.lock

Lines changed: 253 additions & 217 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

graphql_compiler/deserialization.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Copyright 2020-present Kensho Technologies, LLC.
22
"""Convert values to their underlying GraphQLType."""
3+
from datetime import date, datetime
34
from types import MappingProxyType
45
from typing import Any, Callable, Mapping, Tuple, Type
56

@@ -21,8 +22,8 @@
2122

2223
_ALLOWED_SCALAR_TYPES: Mapping[str, Tuple[Type, ...]] = MappingProxyType(
2324
{
24-
GraphQLDate.name: (str,),
25-
GraphQLDateTime.name: (str,),
25+
GraphQLDate.name: (str, date),
26+
GraphQLDateTime.name: (str, date, datetime),
2627
GraphQLFloat.name: (str, float, int),
2728
GraphQLDecimal.name: (str, float, int),
2829
GraphQLInt.name: (int, str),
@@ -124,6 +125,18 @@ def deserialize_scalar_value(expected_type: GraphQLScalarType, value: Any) -> An
124125
f"Cannot deserialize boolean value {value} to non-GraphQLBoolean type {expected_type}."
125126
)
126127

128+
# Explicitly disallow passing datetime objects as date objects.
129+
# In Python, datetime subclasses date and therefore datetimes are treated as dates implicitly
130+
# by truncating the time and timezone components. However, this is a loss of precision,
131+
# and implicitly losing precision like this is undesirable for our purposes.
132+
if isinstance(value, datetime) and is_same_type(GraphQLDate, expected_type):
133+
raise ValueError(
134+
f"Cannot use the datetime object {value} as a GraphQL Date value. While Python "
135+
f"datetimes are subclasses of date, the default behavior of simply truncating the time "
136+
f"and time zone data is undesirable as an implicit default. Please instead use "
137+
f"a date object or a string representing a date in ISO-8601 'YYYY-MM-DD' format."
138+
)
139+
127140
# Ensure value has an appropriate type and deserialize the value.
128141
expected_python_types, deserialization_function = types_and_deserialization
129142
if not isinstance(value, expected_python_types):

graphql_compiler/query_formatting/common.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import decimal
55
from typing import Any, Collection, Dict, Mapping, NoReturn, Type
66

7-
import arrow
87
from graphql import (
98
GraphQLBoolean,
109
GraphQLFloat,
@@ -102,8 +101,8 @@ def validate_argument_type(name: str, expected_type: QueryArgumentGraphQLType, v
102101
except ValueError as e:
103102
raise GraphQLInvalidArgumentError(e)
104103
elif is_same_type(GraphQLDateTime, stripped_type):
105-
if not isinstance(value, (datetime.date, arrow.Arrow)):
106-
_raise_invalid_type_error(name, (datetime.date, arrow.Arrow), value)
104+
if not isinstance(value, datetime.datetime):
105+
_raise_invalid_type_error(name, (datetime.datetime,), value)
107106
try:
108107
GraphQLDateTime.serialize(value)
109108
except ValueError as e:

graphql_compiler/query_formatting/cypher_formatting.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import json
44
from string import Template
55

6-
import arrow
76
from graphql import GraphQLBoolean, GraphQLFloat, GraphQLID, GraphQLInt, GraphQLList, GraphQLString
87
import six
98

@@ -100,9 +99,7 @@ def _safe_cypher_argument(expected_type, argument_value):
10099
elif is_same_type(GraphQLDate, expected_type):
101100
return _safe_cypher_date_and_datetime(expected_type, (datetime.date,), argument_value)
102101
elif is_same_type(GraphQLDateTime, expected_type):
103-
return _safe_cypher_date_and_datetime(
104-
expected_type, (datetime.datetime, arrow.Arrow), argument_value
105-
)
102+
return _safe_cypher_date_and_datetime(expected_type, (datetime.datetime,), argument_value)
106103
elif isinstance(expected_type, GraphQLList):
107104
return _safe_cypher_list(expected_type.of_type, argument_value)
108105
else:

graphql_compiler/schema/__init__.py

Lines changed: 80 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from itertools import chain
77
from typing import Any, FrozenSet, Iterable
88

9-
import arrow
9+
# C-based module confuses pylint, which is why we disable the check below.
10+
from ciso8601 import parse_datetime # pylint: disable=no-name-in-module
1011
from graphql import (
1112
DirectiveLocation,
1213
GraphQLArgument,
@@ -340,45 +341,92 @@ def _serialize_date(value: Any) -> str:
340341

341342
def _parse_date_value(value: Any) -> date:
342343
"""Deserialize a Date object from its proper ISO-8601 representation."""
343-
return arrow.get(value, "YYYY-MM-DD").date()
344+
if type(value) == date:
345+
# We prefer exact type equality instead of isinstance() because datetime objects
346+
# are subclasses of date but are not interchangeable for dates for our purposes.
347+
return value
348+
elif isinstance(value, str):
349+
# ciso8601 only supports parsing into datetime objects, not date objects.
350+
# This is not a problem in itself: "YYYY-MM-DD" strings will get parsed into datetimes
351+
# with hour/minute/second/microsecond set to 0, and tzinfo=None.
352+
# We don't want our parsing to implicitly lose precision, so before we convert the parsed
353+
# datetime into a date value, we just assert that these fields are set as expected.
354+
dt = parse_datetime(value) # This will raise ValueError in case of bad ISO 8601 formatting.
355+
if (
356+
dt.hour != 0
357+
or dt.minute != 0
358+
or dt.second != 0
359+
or dt.microsecond != 0
360+
or dt.tzinfo is not None
361+
):
362+
raise ValueError(
363+
f"Expected an ISO-8601 date string in 'YYYY-MM-DD' format, but got a datetime "
364+
f"string with a non-empty time component. This is not supported, since converting "
365+
f"it to a date would result in an implicit loss of precision. Received value "
366+
f"{repr(value)}, parsed as {dt}."
367+
)
368+
369+
return dt.date()
370+
else:
371+
raise ValueError(
372+
f"Expected a date object or its ISO-8601 'YYYY-MM-DD' string representation. "
373+
f"Got {value} of type {type(value)} instead."
374+
)
344375

345376

346377
def _serialize_datetime(value: Any) -> str:
347378
"""Serialize a DateTime object to its proper ISO-8601 representation."""
348-
# Python datetime.datetime is a subclass of datetime.date, but in this case, the two are not
349-
# interchangeable. Rather than using isinstance, we will therefore check for exact type
350-
# equality.
351-
#
352-
# We don't allow Arrow objects as input since it seems that Arrow objects are always tz aware.
353-
# This is supported by the fact that the `.naive` Arrow method returns a datetime object instead
354-
# of an Arrow object.
355-
if type(value) == datetime and value.tzinfo is None:
379+
if isinstance(value, datetime) and value.tzinfo is None:
356380
return value.isoformat()
357381
else:
358382
raise ValueError(
359-
f"Expected a timezone naive datetime object. Got {value} of type {type(value)} instead."
383+
f"Expected a timezone-naive datetime object. Got {value} of type {type(value)} instead."
360384
)
361385

362386

363387
def _parse_datetime_value(value: Any) -> datetime:
364-
"""Deserialize a DateTime object from its proper ISO-8601 representation."""
365-
# attempt to parse with microsecond information
366-
try:
367-
arrow_result = arrow.get(value, "YYYY-MM-DDTHH:mm:ss")
368-
except arrow.parser.ParserMatchError:
369-
arrow_result = arrow.get(value, "YYYY-MM-DDTHH:mm:ss.S")
370-
371-
# arrow parses datetime naive strings into Arrow objects with a UTC timezone.
372-
return arrow_result.naive
388+
"""Deserialize a DateTime object from a date/datetime or a ISO-8601 string representation."""
389+
if isinstance(value, datetime) and value.tzinfo is None:
390+
return value
391+
elif isinstance(value, str):
392+
dt = parse_datetime(value) # This will raise ValueError in case of bad ISO 8601 formatting.
393+
if dt.tzinfo is not None:
394+
raise ValueError(
395+
f"Expected a timezone-naive datetime value, but got a timezone-aware datetime "
396+
f"string. This is not supported, since discarding the timezone component would "
397+
f"result in an implicit loss of precision. Received value {repr(value)}, "
398+
f"parsed as {dt}."
399+
)
400+
401+
return dt
402+
elif type(value) == date:
403+
# The date type is a supertype of datetime. We check for exact type equality
404+
# rather than using isinstance(), to avoid having this branch get hit
405+
# by timezone-aware datetimes (i.e. ones that fail the value.tzinfo is None check above).
406+
#
407+
# This is a widening conversion (there's no loss of precision) so we allow it to be implicit
408+
# since use ciso8601 parsing logic for parsing datetimes, and ciso8601 successfully parses
409+
# datetimes that only have data down to day precision.
410+
return datetime(value.year, value.month, value.day)
411+
else:
412+
raise ValueError(
413+
f"Expected a timezone-naive datetime or an ISO-8601 string representation parseable "
414+
f"by the ciso8601 library. Got {value} of type {type(value)} instead."
415+
)
373416

374417

375418
GraphQLDate = GraphQLScalarType(
376419
name="Date",
377420
description=(
378421
"The `Date` scalar type represents day-accuracy date objects."
379422
"Values are serialized following the ISO-8601 datetime format specification, "
380-
'for example "2017-03-21". The year, month and day fields must be included, '
381-
"and the format followed exactly, or the behavior is undefined."
423+
'for example "2017-03-21". Serialization and parsing support is guaranteed for the format '
424+
"described here, with the year, month and day fields included and separated by dashes as "
425+
"in the example. Implementations are allowed to support additional serialization formats, "
426+
"if they so choose."
427+
# GraphQL compiler's implementation of GraphQL-based querying uses the ciso8601 library
428+
# for date and datetime parsing, so it additionally supports the subset of the ISO-8601
429+
# standard supported by that library.
382430
),
383431
serialize=_serialize_date,
384432
parse_value=_parse_date_value,
@@ -389,10 +437,16 @@ def _parse_datetime_value(value: Any) -> datetime:
389437
GraphQLDateTime = GraphQLScalarType(
390438
name="DateTime",
391439
description=(
392-
"The `DateTime` scalar type represents timezone-naive second-accuracy timestamps."
393-
"Values are serialized following the ISO-8601 datetime format specification, "
394-
'for example "2017-03-21T12:34:56". All of these fields must be included, '
395-
"including the seconds, and the format followed exactly, or the behavior is undefined."
440+
"The `DateTime` scalar type represents timezone-naive timestamps with up to microsecond "
441+
"accuracy. Values are serialized following the ISO-8601 datetime format specification, "
442+
'for example "2017-03-21T12:34:56.012345" or "2017-03-21T12:34:56". Serialization and '
443+
"parsing support is guaranteed for the format described here, with all fields down to "
444+
"and including seconds required to be included, and fractional seconds optional, as in "
445+
"the example. Implementations are allowed to support additional serialization formats, "
446+
"if they so choose."
447+
# GraphQL compiler's implementation of GraphQL-based querying uses the ciso8601 library
448+
# for date and datetime parsing, so it additionally supports the subset of the ISO-8601
449+
# standard supported by that library.
396450
),
397451
serialize=_serialize_datetime,
398452
parse_value=_parse_datetime_value,

graphql_compiler/tests/snapshot_tests/test_cost_estimation.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
# Copyright 2019-present Kensho Technologies, LLC.
2-
from datetime import date, datetime
2+
from datetime import date, datetime, timedelta, timezone
33
import math
44
from typing import Any, Dict, List
55
import unittest
66

77
import pytest
8-
import pytz
98

109
from .. import test_input_data
1110
from ...compiler.metadata import FilterInfo
@@ -1849,11 +1848,22 @@ def test_int_value_conversion_datetime(self):
18491848
datetime_values = [
18501849
datetime(2000, 1, 1),
18511850
datetime(3000, 1, 1, tzinfo=None),
1852-
datetime(1000, 1, 1, tzinfo=pytz.utc),
1853-
datetime(1, 1, 1, tzinfo=pytz.utc),
1854-
datetime(2000, 1, 1, 20, 55, 40, 877633, tzinfo=pytz.utc),
1855-
datetime(2000, 1, 1, 20, 55, 40, 877633, tzinfo=pytz.timezone("GMT")),
1856-
datetime(2000, 1, 1, 20, 55, 40, 877633, tzinfo=pytz.timezone("America/New_York")),
1851+
datetime(1000, 1, 1, tzinfo=timezone.utc),
1852+
datetime(1, 1, 1, tzinfo=timezone.utc),
1853+
datetime(2000, 1, 1, 20, 55, 40, 877633, tzinfo=timezone.utc),
1854+
datetime(
1855+
2000, 1, 1, 20, 55, 40, 877633, tzinfo=timezone(timedelta(hours=0), name="GMT")
1856+
),
1857+
datetime(
1858+
2000,
1859+
1,
1860+
1,
1861+
20,
1862+
55,
1863+
40,
1864+
877633,
1865+
tzinfo=timezone(timedelta(hours=-4), name="America/New_York"),
1866+
),
18571867
]
18581868
for datetime_value in datetime_values:
18591869
int_value = convert_field_value_to_int(

graphql_compiler/tests/test_end_to_end.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
GraphQLScalarType,
1515
GraphQLString,
1616
)
17-
import pytz
1817
import six
1918

2019
from .. import graphql_to_gremlin, graphql_to_match
@@ -227,9 +226,16 @@ def test_argument_types(self) -> None:
227226
(
228227
"2007-12-06T16:29:43",
229228
datetime.date(2007, 12, 6),
230-
datetime.datetime(2008, 12, 6, 16, 29, 43, 79043, tzinfo=pytz.utc),
229+
datetime.datetime(2008, 12, 6, 16, 29, 43, 79043, tzinfo=datetime.timezone.utc),
231230
datetime.datetime(
232-
2009, 12, 6, 16, 29, 43, 79043, tzinfo=pytz.timezone("US/Eastern")
231+
2009,
232+
12,
233+
6,
234+
16,
235+
29,
236+
43,
237+
79043,
238+
tzinfo=datetime.timezone(datetime.timedelta(hours=-4), name="US/Eastern"),
233239
),
234240
),
235241
),
@@ -272,9 +278,19 @@ def test_date_deserialization(self) -> None:
272278
self.assertEqual(datetime.date(2014, 2, 5), value)
273279

274280
def test_datetime_deserialization(self) -> None:
275-
# No time provided
276-
with self.assertRaises(GraphQLInvalidArgumentError):
277-
deserialize_argument("birth_time", GraphQLDateTime, "2014-02-05")
281+
# No time provided, but still acceptable with zero time components.
282+
value = deserialize_argument("birth_time", GraphQLDateTime, "2014-02-05")
283+
self.assertEqual(datetime.datetime(2014, 2, 5), value)
284+
285+
# Time component with excess precision is truncated (not rounded!) down to microseconds.
286+
# This example has 7 decimal places, whereas Python supports a maximum of 6.
287+
value = deserialize_argument("birth_time", GraphQLDateTime, "2000-02-29T13:02:27.0018349")
288+
self.assertEqual(datetime.datetime(2000, 2, 29, 13, 2, 27, 1834), value)
289+
290+
# Allow dates to be implicitly converted into datetimes, since this is a lossless,
291+
# widening conversion.
292+
value = deserialize_argument("birth_time", GraphQLDateTime, datetime.date(2000, 2, 29))
293+
self.assertEqual(datetime.datetime(2000, 2, 29), value)
278294

279295
# With timezone
280296
with self.assertRaises(GraphQLInvalidArgumentError):

graphql_compiler/tests/test_helpers.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -130,19 +130,23 @@
130130
}
131131
132132
\"\"\"
133-
The `Date` scalar type represents day-accuracy date objects.Values are
134-
serialized following the ISO-8601 datetime format specification, for example
135-
"2017-03-21". The year, month and day fields must be included, and the format
136-
followed exactly, or the behavior is undefined.
133+
The `Date` scalar type represents day-accuracy date objects.
134+
Values are serialized following the ISO-8601 datetime format specification,
135+
for example "2017-03-21". Serialization and parsing support is guaranteed for the format
136+
described here, with the year, month and day fields included and separated by dashes as
137+
in the example. Implementations are allowed to support additional serialization formats,
138+
if they so choose.
137139
\"\"\"
138140
scalar Date
139141
140142
\"\"\"
141-
The `DateTime` scalar type represents timezone-naive second-accuracy
142-
timestamps.Values are serialized following the ISO-8601 datetime format
143-
specification, for example "2017-03-21T12:34:56". All of these fields must
144-
be included, including the seconds, and the format followed
145-
exactly, or the behavior is undefined.
143+
The `DateTime` scalar type represents timezone-naive timestamps with up to microsecond
144+
accuracy. Values are serialized following the ISO-8601 datetime format specification,
145+
for example "2017-03-21T12:34:56.012345" or "2017-03-21T12:34:56". Serialization and
146+
parsing support is guaranteed for the format described here, with all fields down to
147+
and including seconds required to be included, and fractional seconds optional, as in
148+
the example. Implementations are allowed to support additional serialization formats,
149+
if they so choose.
146150
\"\"\"
147151
scalar DateTime
148152

graphql_compiler/tests/test_post_processing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def test_convert_basic_datetime(self):
320320
{
321321
"child_datetime_fields": [
322322
datetime.datetime(2020, 1, 1, 5, 45),
323-
datetime.datetime(2000, 2, 29, 13, 2, 27, 1835),
323+
datetime.datetime(2000, 2, 29, 13, 2, 27, 1834), # truncated, not rounded
324324
None,
325325
],
326326
}

0 commit comments

Comments
 (0)