diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 68ec9fe45c..cc498f448e 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -53,8 +53,10 @@ struct module_state { PyObject* Decimal128; PyObject* Mapping; PyObject* DatetimeMS; - PyObject* _min_datetime_ms; - PyObject* _max_datetime_ms; + PyObject* min_datetime; + PyObject* max_datetime; + PyObject* replace_args; + PyObject* replace_kwargs; PyObject* _type_marker_str; PyObject* _flags_str; PyObject* _pattern_str; @@ -80,6 +82,8 @@ struct module_state { PyObject* _from_uuid_str; PyObject* _as_uuid_str; PyObject* _from_bid_str; + int64_t min_millis; + int64_t max_millis; }; #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) @@ -253,7 +257,7 @@ static PyObject* datetime_from_millis(long long millis) { * 2. Multiply that by 1000: 253402300799000 * 3. Add in microseconds divided by 1000 253402300799999 * - * (Note: BSON doesn't support microsecond accuracy, hence the rounding.) + * (Note: BSON doesn't support microsecond accuracy, hence the truncation.) * * To decode we could do: * 1. Get seconds: timestamp / 1000: 253402300799 @@ -376,6 +380,118 @@ static int millis_from_datetime_ms(PyObject* dt, long long* out){ return 1; } +static PyObject* decode_datetime(PyObject* self, long long millis, const codec_options_t* options){ + PyObject* naive = NULL; + PyObject* replace = NULL; + PyObject* args = NULL; + PyObject* kwargs = NULL; + PyObject* value = NULL; + struct module_state *state = GETSTATE(self); + if (options->datetime_conversion == DATETIME_MS){ + return datetime_ms_from_millis(self, millis); + } + + int dt_clamp = options->datetime_conversion == DATETIME_CLAMP; + int dt_auto = options->datetime_conversion == DATETIME_AUTO; + + if (dt_clamp || dt_auto){ + int64_t min_millis = state->min_millis; + int64_t max_millis = state->max_millis; + int64_t min_millis_offset = 0; + int64_t max_millis_offset = 0; + if (options->tz_aware && options->tzinfo && options->tzinfo != Py_None) { + PyObject* utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->min_datetime, NULL); + if (utcoffset == NULL) { + return 0; + } + if (utcoffset != Py_None) { + if (!PyDelta_Check(utcoffset)) { + PyObject* BSONError = _error("BSONError"); + if (BSONError) { + PyErr_SetString(BSONError, "tzinfo.utcoffset() did not return a datetime.timedelta"); + Py_DECREF(BSONError); + } + Py_DECREF(utcoffset); + return 0; + } + min_millis_offset = (PyDateTime_DELTA_GET_DAYS(utcoffset) * 86400 + + PyDateTime_DELTA_GET_SECONDS(utcoffset)) * 1000 + + (PyDateTime_DELTA_GET_MICROSECONDS(utcoffset) / 1000); + } + Py_DECREF(utcoffset); + utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->max_datetime, NULL); + if (utcoffset == NULL) { + return 0; + } + if (utcoffset != Py_None) { + if (!PyDelta_Check(utcoffset)) { + PyObject* BSONError = _error("BSONError"); + if (BSONError) { + PyErr_SetString(BSONError, "tzinfo.utcoffset() did not return a datetime.timedelta"); + Py_DECREF(BSONError); + } + Py_DECREF(utcoffset); + return 0; + } + max_millis_offset = (PyDateTime_DELTA_GET_DAYS(utcoffset) * 86400 + + PyDateTime_DELTA_GET_SECONDS(utcoffset)) * 1000 + + (PyDateTime_DELTA_GET_MICROSECONDS(utcoffset) / 1000); + } + Py_DECREF(utcoffset); + } + if (min_millis_offset < 0) { + min_millis -= min_millis_offset; + } + + if (max_millis_offset > 0) { + max_millis -= max_millis_offset; + } + + if (dt_clamp) { + if (millis < min_millis) { + millis = min_millis; + } else if (millis > max_millis) { + millis = max_millis; + } + // Continues from here to return a datetime. + } else { // dt_auto + if (millis < min_millis || millis > max_millis){ + return datetime_ms_from_millis(self, millis); + } + } + } + + naive = datetime_from_millis(millis); + if (!naive) { + goto invalid; + } + + if (!options->tz_aware) { /* In the naive case, we're done here. */ + return naive; + } + replace = PyObject_GetAttr(naive, state->_replace_str); + if (!replace) { + goto invalid; + } + value = PyObject_Call(replace, state->replace_args, state->replace_kwargs); + if (!value) { + goto invalid; + } + + /* convert to local time */ + if (options->tzinfo != Py_None) { + PyObject* temp = PyObject_CallMethodObjArgs(value, state->_astimezone_str, options->tzinfo, NULL); + Py_DECREF(value); + value = temp; + } +invalid: + Py_XDECREF(naive); + Py_XDECREF(replace); + Py_XDECREF(args); + Py_XDECREF(kwargs); + return value; +} + /* Just make this compatible w/ the old API. */ int buffer_write_bytes(buffer_t buffer, const char* data, int size) { if (pymongo_buffer_write(buffer, data, size)) { @@ -482,6 +598,8 @@ static int _load_python_objects(PyObject* module) { PyObject* empty_string = NULL; PyObject* re_compile = NULL; PyObject* compiled = NULL; + PyObject* min_datetime_ms = NULL; + PyObject* max_datetime_ms = NULL; struct module_state *state = GETSTATE(module); if (!state) { return 1; @@ -530,10 +648,34 @@ static int _load_python_objects(PyObject* module) { _load_object(&state->UUID, "uuid", "UUID") || _load_object(&state->Mapping, "collections.abc", "Mapping") || _load_object(&state->DatetimeMS, "bson.datetime_ms", "DatetimeMS") || - _load_object(&state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms") || - _load_object(&state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms")) { + _load_object(&min_datetime_ms, "bson.datetime_ms", "_MIN_UTC_MS") || + _load_object(&max_datetime_ms, "bson.datetime_ms", "_MAX_UTC_MS") || + _load_object(&state->min_datetime, "bson.datetime_ms", "_MIN_UTC") || + _load_object(&state->max_datetime, "bson.datetime_ms", "_MAX_UTC")) { + return 1; + } + + state->min_millis = PyLong_AsLongLong(min_datetime_ms); + state->max_millis = PyLong_AsLongLong(max_datetime_ms); + Py_DECREF(min_datetime_ms); + Py_DECREF(max_datetime_ms); + if ((state->min_millis == -1 || state->max_millis == -1) && PyErr_Occurred()) { + return 1; + } + + /* Speed up datetime.replace(tzinfo=utc) call */ + state->replace_args = PyTuple_New(0); + if (!state->replace_args) { + return 1; + } + state->replace_kwargs = PyDict_New(); + if (!state->replace_kwargs) { return 1; } + if (PyDict_SetItem(state->replace_kwargs, state->_tzinfo_str, state->UTC) == -1) { + return 1; + } + /* Reload our REType hack too. */ empty_string = PyBytes_FromString(""); if (empty_string == NULL) { @@ -1247,8 +1389,8 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; if (utcoffset != Py_None) { PyObject* result = PyNumber_Subtract(value, utcoffset); - Py_DECREF(utcoffset); if (!result) { + Py_DECREF(utcoffset); return 0; } millis = millis_from_datetime(result); @@ -1256,6 +1398,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, } else { millis = millis_from_datetime(value); } + Py_DECREF(utcoffset); *(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09; return buffer_write_int64(buffer, (int64_t)millis); } else if (PyObject_TypeCheck(value, state->REType)) { @@ -2043,11 +2186,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, } case 9: { - PyObject* naive; - PyObject* replace; - PyObject* args; - PyObject* kwargs; - PyObject* astimezone; int64_t millis; if (max < 8) { goto invalid; @@ -2056,120 +2194,7 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, millis = (int64_t)BSON_UINT64_FROM_LE(millis); *position += 8; - if (options->datetime_conversion == DATETIME_MS){ - value = datetime_ms_from_millis(self, millis); - break; - } - - int dt_clamp = options->datetime_conversion == DATETIME_CLAMP; - int dt_auto = options->datetime_conversion == DATETIME_AUTO; - - - if (dt_clamp || dt_auto){ - PyObject *min_millis_fn_res; - PyObject *max_millis_fn_res; - int64_t min_millis; - int64_t max_millis; - - if (options->tz_aware){ - PyObject* tzinfo = options->tzinfo; - if (tzinfo == Py_None) { - // Default to UTC. - tzinfo = state->UTC; - } - min_millis_fn_res = PyObject_CallFunctionObjArgs(state->_min_datetime_ms, tzinfo, NULL); - max_millis_fn_res = PyObject_CallFunctionObjArgs(state->_max_datetime_ms, tzinfo, NULL); - } else { - min_millis_fn_res = PyObject_CallObject(state->_min_datetime_ms, NULL); - max_millis_fn_res = PyObject_CallObject(state->_max_datetime_ms, NULL); - } - - if (!min_millis_fn_res || !max_millis_fn_res){ - Py_XDECREF(min_millis_fn_res); - Py_XDECREF(max_millis_fn_res); - goto invalid; - } - - min_millis = PyLong_AsLongLong(min_millis_fn_res); - max_millis = PyLong_AsLongLong(max_millis_fn_res); - - if ((min_millis == -1 || max_millis == -1) && PyErr_Occurred()) - { - // min/max_millis check - goto invalid; - } - - if (dt_clamp) { - if (millis < min_millis) { - millis = min_millis; - } else if (millis > max_millis) { - millis = max_millis; - } - // Continues from here to return a datetime. - } else { // dt_auto - if (millis < min_millis || millis > max_millis){ - value = datetime_ms_from_millis(self, millis); - break; // Out-of-range so done. - } - } - } - - naive = datetime_from_millis(millis); - if (!options->tz_aware) { /* In the naive case, we're done here. */ - value = naive; - break; - } - - if (!naive) { - goto invalid; - } - replace = PyObject_GetAttr(naive, state->_replace_str); - Py_DECREF(naive); - if (!replace) { - goto invalid; - } - args = PyTuple_New(0); - if (!args) { - Py_DECREF(replace); - goto invalid; - } - kwargs = PyDict_New(); - if (!kwargs) { - Py_DECREF(replace); - Py_DECREF(args); - goto invalid; - } - if (PyDict_SetItem(kwargs, state->_tzinfo_str, state->UTC) == -1) { - Py_DECREF(replace); - Py_DECREF(args); - Py_DECREF(kwargs); - goto invalid; - } - value = PyObject_Call(replace, args, kwargs); - if (!value) { - Py_DECREF(replace); - Py_DECREF(args); - Py_DECREF(kwargs); - goto invalid; - } - - /* convert to local time */ - if (options->tzinfo != Py_None) { - astimezone = PyObject_GetAttr(value, state->_astimezone_str); - Py_DECREF(value); - if (!astimezone) { - Py_DECREF(replace); - Py_DECREF(args); - Py_DECREF(kwargs); - goto invalid; - } - value = PyObject_CallFunctionObjArgs(astimezone, options->tzinfo, NULL); - Py_DECREF(astimezone); - } - - Py_DECREF(replace); - Py_DECREF(args); - Py_DECREF(kwargs); + value = decode_datetime(self, millis, options); break; } case 11: @@ -3053,6 +3078,10 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) { Py_VISIT(state->_from_uuid_str); Py_VISIT(state->_as_uuid_str); Py_VISIT(state->_from_bid_str); + Py_VISIT(state->min_datetime); + Py_VISIT(state->max_datetime); + Py_VISIT(state->replace_args); + Py_VISIT(state->replace_kwargs); return 0; } @@ -3097,6 +3126,10 @@ static int _cbson_clear(PyObject *m) { Py_CLEAR(state->_from_uuid_str); Py_CLEAR(state->_as_uuid_str); Py_CLEAR(state->_from_bid_str); + Py_CLEAR(state->min_datetime); + Py_CLEAR(state->max_datetime); + Py_CLEAR(state->replace_args); + Py_CLEAR(state->replace_kwargs); return 0; } diff --git a/bson/datetime_ms.py b/bson/datetime_ms.py index 48e57e0d11..1b6fa22794 100644 --- a/bson/datetime_ms.py +++ b/bson/datetime_ms.py @@ -20,7 +20,6 @@ import calendar import datetime -import functools from typing import Any, Union, cast from bson.codec_options import DEFAULT_CODEC_OPTIONS, CodecOptions, DatetimeConversion @@ -127,11 +126,8 @@ def _datetime_to_millis(dtm: datetime.datetime) -> int: _MAX_UTC_MS = _datetime_to_millis(_MAX_UTC) -# Inclusive and exclusive min and max for timezones. -# Timezones are hashed by their offset, which is a timedelta -# and therefore there are more than 24 possible timezones. -@functools.lru_cache(maxsize=None) -def _min_datetime_ms(tz: datetime.timezone = datetime.timezone.utc) -> int: +# Inclusive min and max for timezones. +def _min_datetime_ms(tz: datetime.tzinfo = utc) -> int: delta = tz.utcoffset(_MIN_UTC) if delta is not None: offset_millis = (delta.days * 86400 + delta.seconds) * 1000 + delta.microseconds // 1000 @@ -140,8 +136,7 @@ def _min_datetime_ms(tz: datetime.timezone = datetime.timezone.utc) -> int: return max(_MIN_UTC_MS, _MIN_UTC_MS - offset_millis) -@functools.lru_cache(maxsize=None) -def _max_datetime_ms(tz: datetime.timezone = datetime.timezone.utc) -> int: +def _max_datetime_ms(tz: datetime.tzinfo = utc) -> int: delta = tz.utcoffset(_MAX_UTC) if delta is not None: offset_millis = (delta.days * 86400 + delta.seconds) * 1000 + delta.microseconds // 1000 @@ -159,7 +154,7 @@ def _millis_to_datetime( or opts.datetime_conversion == DatetimeConversion.DATETIME_CLAMP or opts.datetime_conversion == DatetimeConversion.DATETIME_AUTO ): - tz = opts.tzinfo or datetime.timezone.utc + tz = opts.tzinfo or utc if opts.datetime_conversion == DatetimeConversion.DATETIME_CLAMP: millis = max(_min_datetime_ms(tz), min(millis, _max_datetime_ms(tz))) elif opts.datetime_conversion == DatetimeConversion.DATETIME_AUTO: diff --git a/bson/json_util.py b/bson/json_util.py index 6c5197c75a..4269ba9858 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -125,10 +125,10 @@ from bson.code import Code from bson.codec_options import CodecOptions, DatetimeConversion from bson.datetime_ms import ( + _MAX_UTC_MS, EPOCH_AWARE, DatetimeMS, _datetime_to_millis, - _max_datetime_ms, _millis_to_datetime, ) from bson.dbref import DBRef @@ -844,7 +844,7 @@ def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any: def _encode_datetimems(obj: Any, json_options: JSONOptions) -> dict: if ( json_options.datetime_representation == DatetimeRepresentation.ISO8601 - and 0 <= int(obj) <= _max_datetime_ms() + and 0 <= int(obj) <= _MAX_UTC_MS ): return _encode_datetime(obj.as_datetime(), json_options) elif json_options.datetime_representation == DatetimeRepresentation.LEGACY: diff --git a/bson/objectid.py b/bson/objectid.py index a5500872da..970c4e52e8 100644 --- a/bson/objectid.py +++ b/bson/objectid.py @@ -16,7 +16,6 @@ from __future__ import annotations import binascii -import calendar import datetime import os import struct @@ -25,6 +24,7 @@ from random import SystemRandom from typing import Any, NoReturn, Optional, Type, Union +from bson.datetime_ms import _datetime_to_millis from bson.errors import InvalidId from bson.tz_util import utc @@ -131,11 +131,10 @@ def from_datetime(cls: Type[ObjectId], generation_time: datetime.datetime) -> Ob :param generation_time: :class:`~datetime.datetime` to be used as the generation time for the resulting ObjectId. """ - offset = generation_time.utcoffset() - if offset is not None: - generation_time = generation_time - offset - timestamp = calendar.timegm(generation_time.timetuple()) - oid = _PACK_INT(int(timestamp)) + b"\x00\x00\x00\x00\x00\x00\x00\x00" + oid = ( + _PACK_INT(_datetime_to_millis(generation_time) // 1000) + + b"\x00\x00\x00\x00\x00\x00\x00\x00" + ) return cls(oid) @classmethod diff --git a/doc/changelog.rst b/doc/changelog.rst index d80f78fe4d..42a4fdf50f 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -31,6 +31,12 @@ PyMongo 4.9 brings a number of improvements including: :class:`~pymongo.operations.DeleteMany` operations, so they can be used in the new :meth:`~pymongo.mongo_client.MongoClient.bulk_write`. - Added :func:`repr` support to :class:`bson.tz_util.FixedOffset`. +- Fixed a bug where PyMongo would raise ``InvalidBSON: unhashable type: 'tzfile'`` + when using :attr:`~bson.codec_options.DatetimeConversion.DATETIME_CLAMP` or + :attr:`~bson.codec_options.DatetimeConversion.DATETIME_AUTO` with a timezone from dateutil. +- Fixed a bug where PyMongo would raise ``InvalidBSON: date value out of range`` + when using :attr:`~bson.codec_options.DatetimeConversion.DATETIME_CLAMP` or + :attr:`~bson.codec_options.DatetimeConversion.DATETIME_AUTO` with a non-UTC timezone. Issues Resolved ............... diff --git a/doc/examples/datetimes.rst b/doc/examples/datetimes.rst index 5571880e94..1790506423 100644 --- a/doc/examples/datetimes.rst +++ b/doc/examples/datetimes.rst @@ -98,7 +98,7 @@ out of MongoDB in US/Pacific time: >>> aware_times = db.times.with_options(codec_options=CodecOptions( ... tz_aware=True, ... tzinfo=pytz.timezone('US/Pacific'))) - >>> result = aware_times.find_one() + >>> result = aware_times.find_one()['date'] datetime.datetime(2002, 10, 27, 6, 0, # doctest: +NORMALIZE_WHITESPACE tzinfo=) diff --git a/test/test_bson.py b/test/test_bson.py index 4996c46b92..a0190ef2d8 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -1362,6 +1362,31 @@ def test_tz_clamping_non_utc_simple(self): opts = CodecOptions(datetime_conversion=conversion, tz_aware=True, tzinfo=tz) self.assertEqual(decode(encoded, opts)["d"], dtm.replace(tzinfo=utc).astimezone(tz)) + def test_tz_clamping_non_hashable(self): + class NonHashableTZ(FixedOffset): + __hash__ = None + + tz = NonHashableTZ(0, "UTC-non-hashable") + self.assertRaises(TypeError, hash, tz) + # Aware clamping. + opts = CodecOptions( + datetime_conversion=DatetimeConversion.DATETIME_CLAMP, tz_aware=True, tzinfo=tz + ) + below = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)}) + dec_below = decode(below, opts) + self.assertEqual(dec_below["x"], datetime.datetime.min.replace(tzinfo=tz)) + + within = encode({"x": EPOCH_AWARE.astimezone(tz)}) + dec_within = decode(within, opts) + self.assertEqual(dec_within["x"], EPOCH_AWARE.astimezone(tz)) + + above = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60)}) + dec_above = decode(above, opts) + self.assertEqual( + dec_above["x"], + datetime.datetime.max.replace(tzinfo=tz, microsecond=999000), + ) + def test_datetime_auto(self): # Naive auto, in range. opts1 = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_AUTO) diff --git a/test/test_json_util.py b/test/test_json_util.py index 0f73a8efd9..3a40c174e8 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -39,7 +39,7 @@ UuidRepresentation, ) from bson.code import Code -from bson.datetime_ms import _max_datetime_ms +from bson.datetime_ms import _MAX_UTC_MS from bson.dbref import DBRef from bson.decimal128 import Decimal128 from bson.int64 import Int64 @@ -257,7 +257,7 @@ def test_datetime(self): def test_datetime_ms(self): # Test ISO8601 in-range dat_min: dict[str, Any] = {"x": DatetimeMS(0)} - dat_max: dict[str, Any] = {"x": DatetimeMS(_max_datetime_ms())} + dat_max: dict[str, Any] = {"x": DatetimeMS(_MAX_UTC_MS)} opts = JSONOptions(datetime_representation=DatetimeRepresentation.ISO8601) self.assertEqual( @@ -271,7 +271,7 @@ def test_datetime_ms(self): # Test ISO8601 out-of-range dat_min = {"x": DatetimeMS(-1)} - dat_max = {"x": DatetimeMS(_max_datetime_ms() + 1)} + dat_max = {"x": DatetimeMS(_MAX_UTC_MS + 1)} self.assertEqual('{"x": {"$date": {"$numberLong": "-1"}}}', json_util.dumps(dat_min)) self.assertEqual( @@ -302,7 +302,7 @@ def test_datetime_ms(self): # Test decode from datetime.datetime to DatetimeMS dat_min = {"x": datetime.datetime.min} - dat_max = {"x": DatetimeMS(_max_datetime_ms()).as_datetime(CodecOptions(tz_aware=False))} + dat_max = {"x": DatetimeMS(_MAX_UTC_MS).as_datetime(CodecOptions(tz_aware=False))} opts = JSONOptions( datetime_representation=DatetimeRepresentation.ISO8601, datetime_conversion=DatetimeConversion.DATETIME_MS, diff --git a/test/test_objectid.py b/test/test_objectid.py index 771ba09422..26670832f6 100644 --- a/test/test_objectid.py +++ b/test/test_objectid.py @@ -95,9 +95,6 @@ def test_generation_time(self): self.assertTrue(d2 - d1 < datetime.timedelta(seconds=2)) def test_from_datetime(self): - if "PyPy 1.8.0" in sys.version: - # See https://bugs.pypy.org/issue1092 - raise SkipTest("datetime.timedelta is broken in pypy 1.8.0") d = datetime.datetime.now(tz=datetime.timezone.utc).replace(tzinfo=None) d = d - datetime.timedelta(microseconds=d.microsecond) oid = ObjectId.from_datetime(d)