Skip to content

Commit 14002a5

Browse files
bcwarnerBen Warner
andauthored
PYTHON-1824 Allow encoding/decoding out-of-range datetimes via DatetimeMS and datetime_conversion (#981)
https://jira.mongodb.org/browse/PYTHON-1824 Co-authored-by: Ben Warner <[email protected]>
1 parent e96f112 commit 14002a5

15 files changed

+721
-46
lines changed

bson/__init__.py

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
subtype 0. It will be decoded back to bytes.
5555
"""
5656

57-
import calendar
5857
import datetime
5958
import itertools
6059
import re
@@ -100,9 +99,18 @@
10099
from bson.codec_options import (
101100
DEFAULT_CODEC_OPTIONS,
102101
CodecOptions,
102+
DatetimeConversionOpts,
103103
_DocumentType,
104104
_raw_document_class,
105105
)
106+
from bson.datetime_ms import (
107+
EPOCH_AWARE,
108+
EPOCH_NAIVE,
109+
DatetimeMS,
110+
_datetime_to_millis,
111+
_millis_to_datetime,
112+
utc,
113+
)
106114
from bson.dbref import DBRef
107115
from bson.decimal128 import Decimal128
108116
from bson.errors import InvalidBSON, InvalidDocument, InvalidStringData
@@ -113,7 +121,6 @@
113121
from bson.regex import Regex
114122
from bson.son import RE_TYPE, SON
115123
from bson.timestamp import Timestamp
116-
from bson.tz_util import utc
117124

118125
# Import some modules for type-checking only.
119126
if TYPE_CHECKING:
@@ -187,12 +194,10 @@
187194
"is_valid",
188195
"BSON",
189196
"has_c",
197+
"DatetimeConversionOpts",
198+
"DatetimeMS",
190199
]
191200

192-
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
193-
EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
194-
195-
196201
BSONNUM = b"\x01" # Floating point
197202
BSONSTR = b"\x02" # UTF-8 string
198203
BSONOBJ = b"\x03" # Embedded document
@@ -413,7 +418,7 @@ def _get_boolean(
413418

414419
def _get_date(
415420
data: Any, view: Any, position: int, dummy0: int, opts: CodecOptions, dummy1: Any
416-
) -> Tuple[datetime.datetime, int]:
421+
) -> Tuple[Union[datetime.datetime, DatetimeMS], int]:
417422
"""Decode a BSON datetime to python datetime.datetime."""
418423
return _millis_to_datetime(_UNPACK_LONG_FROM(data, position)[0], opts), position + 8
419424

@@ -724,6 +729,12 @@ def _encode_datetime(name: bytes, value: datetime.datetime, dummy0: Any, dummy1:
724729
return b"\x09" + name + _PACK_LONG(millis)
725730

726731

732+
def _encode_datetime_ms(name: bytes, value: DatetimeMS, dummy0: Any, dummy1: Any) -> bytes:
733+
"""Encode datetime.datetime."""
734+
millis = int(value)
735+
return b"\x09" + name + _PACK_LONG(millis)
736+
737+
727738
def _encode_none(name: bytes, dummy0: Any, dummy1: Any, dummy2: Any) -> bytes:
728739
"""Encode python None."""
729740
return b"\x0A" + name
@@ -814,6 +825,7 @@ def _encode_maxkey(name: bytes, dummy0: Any, dummy1: Any, dummy2: Any) -> bytes:
814825
bool: _encode_bool,
815826
bytes: _encode_bytes,
816827
datetime.datetime: _encode_datetime,
828+
DatetimeMS: _encode_datetime_ms,
817829
dict: _encode_mapping,
818830
float: _encode_float,
819831
int: _encode_int,
@@ -948,27 +960,6 @@ def _dict_to_bson(doc: Any, check_keys: bool, opts: CodecOptions, top_level: boo
948960
_dict_to_bson = _cbson._dict_to_bson # noqa: F811
949961

950962

951-
def _millis_to_datetime(millis: int, opts: CodecOptions) -> datetime.datetime:
952-
"""Convert milliseconds since epoch UTC to datetime."""
953-
diff = ((millis % 1000) + 1000) % 1000
954-
seconds = (millis - diff) // 1000
955-
micros = diff * 1000
956-
if opts.tz_aware:
957-
dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds, microseconds=micros)
958-
if opts.tzinfo:
959-
dt = dt.astimezone(opts.tzinfo)
960-
return dt
961-
else:
962-
return EPOCH_NAIVE + datetime.timedelta(seconds=seconds, microseconds=micros)
963-
964-
965-
def _datetime_to_millis(dtm: datetime.datetime) -> int:
966-
"""Convert datetime to milliseconds since epoch UTC."""
967-
if dtm.utcoffset() is not None:
968-
dtm = dtm - dtm.utcoffset() # type: ignore
969-
return int(calendar.timegm(dtm.timetuple()) * 1000 + dtm.microsecond // 1000)
970-
971-
972963
_CODEC_OPTIONS_TYPE_ERROR = TypeError("codec_options must be an instance of CodecOptions")
973964

974965

bson/_cbsonmodule.c

Lines changed: 134 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ struct module_state {
5252
PyObject* BSONInt64;
5353
PyObject* Decimal128;
5454
PyObject* Mapping;
55+
PyObject* DatetimeMS;
56+
PyObject* _min_datetime_ms;
57+
PyObject* _max_datetime_ms;
5558
};
5659

5760
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
@@ -72,6 +75,12 @@ struct module_state {
7275
/* The smallest possible BSON document, i.e. "{}" */
7376
#define BSON_MIN_SIZE 5
7477

78+
/* Datetime codec options */
79+
#define DATETIME 1
80+
#define DATETIME_CLAMP 2
81+
#define DATETIME_MS 3
82+
#define DATETIME_AUTO 4
83+
7584
/* Get an error class from the bson.errors module.
7685
*
7786
* Returns a new ref */
@@ -179,6 +188,45 @@ static long long millis_from_datetime(PyObject* datetime) {
179188
return millis;
180189
}
181190

191+
/* Extended-range datetime, returns a DatetimeMS object with millis */
192+
static PyObject* datetime_ms_from_millis(PyObject* self, long long millis){
193+
// Allocate a new DatetimeMS object.
194+
struct module_state *state = GETSTATE(self);
195+
196+
PyObject* dt;
197+
PyObject* ll_millis;
198+
199+
if (!(ll_millis = PyLong_FromLongLong(millis))){
200+
return NULL;
201+
}
202+
dt = PyObject_CallFunctionObjArgs(state->DatetimeMS, ll_millis, NULL);
203+
Py_DECREF(ll_millis);
204+
return dt;
205+
}
206+
207+
/* Extended-range datetime, takes a DatetimeMS object and extracts the long long value. */
208+
static int millis_from_datetime_ms(PyObject* dt, long long* out){
209+
PyObject* ll_millis;
210+
long long millis;
211+
212+
if (!(ll_millis = PyNumber_Long(dt))){
213+
if (PyErr_Occurred()) { // TypeError
214+
return 0;
215+
}
216+
}
217+
218+
if ((millis = PyLong_AsLongLong(ll_millis)) == -1){
219+
if (PyErr_Occurred()) { /* Overflow */
220+
PyErr_SetString(PyExc_OverflowError,
221+
"MongoDB datetimes can only handle up to 8-byte ints");
222+
return 0;
223+
}
224+
}
225+
Py_DECREF(ll_millis);
226+
*out = millis;
227+
return 1;
228+
}
229+
182230
/* Just make this compatible w/ the old API. */
183231
int buffer_write_bytes(buffer_t buffer, const char* data, int size) {
184232
if (pymongo_buffer_write(buffer, data, size)) {
@@ -342,7 +390,10 @@ static int _load_python_objects(PyObject* module) {
342390
_load_object(&state->BSONInt64, "bson.int64", "Int64") ||
343391
_load_object(&state->Decimal128, "bson.decimal128", "Decimal128") ||
344392
_load_object(&state->UUID, "uuid", "UUID") ||
345-
_load_object(&state->Mapping, "collections.abc", "Mapping")) {
393+
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
394+
_load_object(&state->DatetimeMS, "bson.datetime_ms", "DatetimeMS") ||
395+
_load_object(&state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms") ||
396+
_load_object(&state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms")) {
346397
return 1;
347398
}
348399
/* Reload our REType hack too. */
@@ -466,13 +517,14 @@ int convert_codec_options(PyObject* options_obj, void* p) {
466517

467518
options->unicode_decode_error_handler = NULL;
468519

469-
if (!PyArg_ParseTuple(options_obj, "ObbzOO",
520+
if (!PyArg_ParseTuple(options_obj, "ObbzOOb",
470521
&options->document_class,
471522
&options->tz_aware,
472523
&options->uuid_rep,
473524
&options->unicode_decode_error_handler,
474525
&options->tzinfo,
475-
&type_registry_obj))
526+
&type_registry_obj,
527+
&options->datetime_conversion))
476528
return 0;
477529

478530
type_marker = _type_marker(options->document_class);
@@ -1049,6 +1101,13 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
10491101
}
10501102
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
10511103
return buffer_write_int64(buffer, (int64_t)millis);
1104+
} else if (PyObject_TypeCheck(value, (PyTypeObject *) state->DatetimeMS)) {
1105+
long long millis;
1106+
if (!millis_from_datetime_ms(value, &millis)) {
1107+
return 0;
1108+
}
1109+
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
1110+
return buffer_write_int64(buffer, (int64_t)millis);
10521111
} else if (PyObject_TypeCheck(value, state->REType)) {
10531112
return _write_regex_to_buffer(buffer, type_byte, value);
10541113
}
@@ -1854,8 +1913,79 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
18541913
}
18551914
memcpy(&millis, buffer + *position, 8);
18561915
millis = (int64_t)BSON_UINT64_FROM_LE(millis);
1857-
naive = datetime_from_millis(millis);
18581916
*position += 8;
1917+
1918+
if (options->datetime_conversion == DATETIME_MS){
1919+
value = datetime_ms_from_millis(self, millis);
1920+
break;
1921+
}
1922+
1923+
int dt_clamp = options->datetime_conversion == DATETIME_CLAMP;
1924+
int dt_auto = options->datetime_conversion == DATETIME_AUTO;
1925+
1926+
1927+
if (dt_clamp || dt_auto){
1928+
PyObject *min_millis_fn = _get_object(state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms");
1929+
PyObject *max_millis_fn = _get_object(state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms");
1930+
PyObject *min_millis_fn_res;
1931+
PyObject *max_millis_fn_res;
1932+
int64_t min_millis;
1933+
int64_t max_millis;
1934+
1935+
if (min_millis_fn == NULL || max_millis_fn == NULL) {
1936+
Py_XDECREF(min_millis_fn);
1937+
Py_XDECREF(max_millis_fn);
1938+
goto invalid;
1939+
}
1940+
1941+
if (options->tz_aware){
1942+
PyObject* tzinfo = options->tzinfo;
1943+
if (tzinfo == Py_None) {
1944+
// Default to UTC.
1945+
utc_type = _get_object(state->UTC, "bson.tz_util", "utc");
1946+
tzinfo = utc_type;
1947+
}
1948+
min_millis_fn_res = PyObject_CallFunctionObjArgs(min_millis_fn, tzinfo, NULL);
1949+
max_millis_fn_res = PyObject_CallFunctionObjArgs(max_millis_fn, tzinfo, NULL);
1950+
} else {
1951+
min_millis_fn_res = PyObject_CallObject(min_millis_fn, NULL);
1952+
max_millis_fn_res = PyObject_CallObject(max_millis_fn, NULL);
1953+
}
1954+
1955+
Py_DECREF(min_millis_fn);
1956+
Py_DECREF(max_millis_fn);
1957+
1958+
if (!min_millis_fn_res || !max_millis_fn_res){
1959+
Py_XDECREF(min_millis_fn_res);
1960+
Py_XDECREF(max_millis_fn_res);
1961+
goto invalid;
1962+
}
1963+
1964+
min_millis = PyLong_AsLongLong(min_millis_fn_res);
1965+
max_millis = PyLong_AsLongLong(max_millis_fn_res);
1966+
1967+
if ((min_millis == -1 || max_millis == -1) && PyErr_Occurred())
1968+
{
1969+
// min/max_millis check
1970+
goto invalid;
1971+
}
1972+
1973+
if (dt_clamp) {
1974+
if (millis < min_millis) {
1975+
millis = min_millis;
1976+
} else if (millis > max_millis) {
1977+
millis = max_millis;
1978+
}
1979+
// Continues from here to return a datetime.
1980+
} else if (dt_auto) {
1981+
if (millis < min_millis || millis > max_millis){
1982+
value = datetime_ms_from_millis(self, millis);
1983+
break; // Out-of-range so done.
1984+
}
1985+
}
1986+
}
1987+
1988+
naive = datetime_from_millis(millis);
18591989
if (!options->tz_aware) { /* In the naive case, we're done here. */
18601990
value = naive;
18611991
break;

bson/_cbsonmodule.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ typedef struct codec_options_t {
6262
char* unicode_decode_error_handler;
6363
PyObject* tzinfo;
6464
type_registry_t type_registry;
65+
unsigned char datetime_conversion;
6566
PyObject* options_obj;
6667
unsigned char is_raw_bson;
6768
} codec_options_t;

0 commit comments

Comments
 (0)