Skip to content

Commit 4760d07

Browse files
committed
PYTHON-2152 Expand native UUID handling support; Implement UUID specification
1 parent 2996023 commit 4760d07

File tree

11 files changed

+575
-233
lines changed

11 files changed

+575
-233
lines changed

bson/__init__.py

Lines changed: 22 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,10 @@
7676
from codecs import (utf_8_decode as _utf_8_decode,
7777
utf_8_encode as _utf_8_encode)
7878

79-
from bson.binary import (Binary, OLD_UUID_SUBTYPE,
79+
from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
80+
OLD_UUID_SUBTYPE,
8081
JAVA_LEGACY, CSHARP_LEGACY,
81-
UUIDLegacy)
82+
UUIDLegacy, UUID_SUBTYPE)
8283
from bson.code import Code
8384
from bson.codec_options import (
8485
CodecOptions, DEFAULT_CODEC_OPTIONS, _raw_document_class)
@@ -303,26 +304,29 @@ def _get_binary(data, view, position, obj_end, opts, dummy1):
303304
end = position + length
304305
if length < 0 or end > obj_end:
305306
raise InvalidBSON('bad binary object length')
306-
if subtype == 3:
307-
# Java Legacy
307+
308+
# Convert UUID subtypes to native UUIDs.
309+
# TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0+
310+
if subtype in ALL_UUID_SUBTYPES:
308311
uuid_representation = opts.uuid_representation
309-
if uuid_representation == JAVA_LEGACY:
310-
java = data[position:end]
311-
value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1])
312-
# C# legacy
313-
elif uuid_representation == CSHARP_LEGACY:
314-
value = uuid.UUID(bytes_le=data[position:end])
315-
# Python
316-
else:
317-
value = uuid.UUID(bytes=data[position:end])
318-
return value, end
319-
if subtype == 4:
320-
return uuid.UUID(bytes=data[position:end]), end
312+
binary_value = Binary(data[position:end], subtype)
313+
if uuid_representation == UuidRepresentation.UNSPECIFIED:
314+
return binary_value, end
315+
if subtype == UUID_SUBTYPE:
316+
# Legacy behavior: use STANDARD with binary subtype 4.
317+
uuid_representation = UuidRepresentation.STANDARD
318+
elif uuid_representation == UuidRepresentation.STANDARD:
319+
# subtype == OLD_UUID_SUBTYPE
320+
# Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
321+
uuid_representation = UuidRepresentation.PYTHON_LEGACY
322+
return binary_value.as_uuid(uuid_representation), end
323+
321324
# Python3 special case. Decode subtype 0 to 'bytes'.
322325
if PY3 and subtype == 0:
323326
value = data[position:end]
324327
else:
325328
value = Binary(data[position:end], subtype)
329+
326330
return value, end
327331

328332

@@ -633,21 +637,8 @@ def _encode_binary(name, value, dummy0, dummy1):
633637
def _encode_uuid(name, value, dummy, opts):
634638
"""Encode uuid.UUID."""
635639
uuid_representation = opts.uuid_representation
636-
# Python Legacy Common Case
637-
if uuid_representation == OLD_UUID_SUBTYPE:
638-
return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes
639-
# Java Legacy
640-
elif uuid_representation == JAVA_LEGACY:
641-
from_uuid = value.bytes
642-
data = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
643-
return b"\x05" + name + b'\x10\x00\x00\x00\x03' + data
644-
# C# legacy
645-
elif uuid_representation == CSHARP_LEGACY:
646-
# Microsoft GUID representation.
647-
return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes_le
648-
# New
649-
return b"\x05" + name + b'\x10\x00\x00\x00\x04' + value.bytes
650-
640+
binval = Binary.from_uuid(value, uuid_representation=uuid_representation)
641+
return _encode_binary(name, binval, dummy, opts)
651642

652643
def _encode_objectid(name, value, dummy0, dummy1):
653644
"""Encode bson.objectid.ObjectId."""

bson/_cbsonmodule.c

Lines changed: 46 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ static struct module_state _state;
7878
#define STANDARD 4
7979
#define JAVA_LEGACY 5
8080
#define CSHARP_LEGACY 6
81+
#define UNSPECIFIED 0
8182

8283
#define BSON_MAX_SIZE 2147483647
8384
/* The smallest possible BSON document, i.e. "{}" */
@@ -583,19 +584,6 @@ static int write_element_to_buffer(PyObject* self, buffer_t buffer,
583584
return result;
584585
}
585586

586-
static void
587-
_fix_java(const char* in, char* out) {
588-
int i, j;
589-
for (i = 0, j = 7; i < j; i++, j--) {
590-
out[i] = in[j];
591-
out[j] = in[i];
592-
}
593-
for (i = 8, j = 15; i < j; i++, j--) {
594-
out[i] = in[j];
595-
out[j] = in[i];
596-
}
597-
}
598-
599587
static void
600588
_set_cannot_encode(PyObject* value) {
601589
PyObject* type = NULL;
@@ -1276,73 +1264,35 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
12761264

12771265
uuid_type = _get_object(state->UUID, "uuid", "UUID");
12781266
if (uuid_type && PyObject_IsInstance(value, uuid_type)) {
1279-
/* Just a special case of Binary above, but
1280-
* simpler to do as a separate case. */
1281-
PyObject* bytes;
1282-
/* Could be bytes, bytearray, str... */
1283-
const char* data;
1284-
/* UUID is always 16 bytes */
1285-
int size = 16;
1286-
char subtype;
1267+
PyObject* binary_type = NULL;
1268+
PyObject* binary_value = NULL;
1269+
int result;
12871270

12881271
Py_DECREF(uuid_type);
12891272
/* PyObject_IsInstance returns -1 on error */
12901273
if (PyErr_Occurred()) {
12911274
return 0;
12921275
}
12931276

1294-
if (options->uuid_rep == JAVA_LEGACY
1295-
|| options->uuid_rep == CSHARP_LEGACY) {
1296-
subtype = 3;
1297-
}
1298-
else {
1299-
subtype = options->uuid_rep;
1300-
}
1301-
1302-
*(buffer_get_buffer(buffer) + type_byte) = 0x05;
1303-
if (!buffer_write_int32(buffer, (int32_t)size)) {
1304-
return 0;
1305-
}
1306-
if (!buffer_write_bytes(buffer, &subtype, 1)) {
1277+
binary_type = _get_object(state->Binary, "bson", "Binary");
1278+
if (binary_type == NULL) {
13071279
return 0;
13081280
}
13091281

1310-
if (options->uuid_rep == CSHARP_LEGACY) {
1311-
/* Legacy C# byte order */
1312-
bytes = PyObject_GetAttrString(value, "bytes_le");
1313-
}
1314-
else {
1315-
bytes = PyObject_GetAttrString(value, "bytes");
1316-
}
1317-
if (!bytes) {
1318-
return 0;
1319-
}
1320-
#if PY_MAJOR_VERSION >= 3
1321-
data = PyBytes_AsString(bytes);
1322-
#else
1323-
data = PyString_AsString(bytes);
1324-
#endif
1325-
if (data == NULL) {
1326-
Py_DECREF(bytes);
1282+
binary_value = PyObject_CallMethod(binary_type, "from_uuid", "(Oi)", value, options->uuid_rep);
1283+
if (binary_value == NULL) {
1284+
Py_DECREF(binary_type);
13271285
return 0;
13281286
}
1329-
if (options->uuid_rep == JAVA_LEGACY) {
1330-
/* Store in legacy java byte order. */
1331-
char as_legacy_java[16];
1332-
_fix_java(data, as_legacy_java);
1333-
if (!buffer_write_bytes(buffer, as_legacy_java, size)) {
1334-
Py_DECREF(bytes);
1335-
return 0;
1336-
}
1337-
}
1338-
else {
1339-
if (!buffer_write_bytes(buffer, data, size)) {
1340-
Py_DECREF(bytes);
1341-
return 0;
1342-
}
1343-
}
1344-
Py_DECREF(bytes);
1345-
return 1;
1287+
1288+
result = _write_element_to_buffer(self, buffer,
1289+
type_byte, binary_value,
1290+
check_keys, options,
1291+
in_custom_call,
1292+
in_fallback_call);
1293+
Py_DECREF(binary_type);
1294+
Py_DECREF(binary_value);
1295+
return result;
13461296
}
13471297
Py_XDECREF(mapping_type);
13481298
Py_XDECREF(uuid_type);
@@ -1823,7 +1773,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
18231773
unsigned* position, unsigned char type,
18241774
unsigned max, const codec_options_t* options) {
18251775
struct module_state *state = GETSTATE(self);
1826-
18271776
PyObject* value = NULL;
18281777
switch (type) {
18291778
case 1:
@@ -2063,70 +2012,49 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
20632012
if (!data) {
20642013
goto invalid;
20652014
}
2066-
/* Encode as UUID, not Binary */
2015+
/* Encode as UUID or Binary based on options->uuid_rep
2016+
* TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0 */
20672017
if (subtype == 3 || subtype == 4) {
2068-
PyObject* kwargs;
2069-
PyObject* args = PyTuple_New(0);
2018+
PyObject* binary_type = NULL;
2019+
PyObject* binary_value = NULL;
2020+
char uuid_rep = options->uuid_rep;
2021+
20702022
/* UUID should always be 16 bytes */
2071-
if (!args || length != 16) {
2072-
Py_DECREF(data);
2073-
goto invalid;
2074-
}
2075-
kwargs = PyDict_New();
2076-
if (!kwargs) {
2077-
Py_DECREF(data);
2078-
Py_DECREF(args);
2079-
goto invalid;
2023+
if (length != 16) {
2024+
goto uuiderror;
20802025
}
20812026

2082-
/*
2083-
* From this point, we hold refs to args, kwargs, and data.
2084-
* If anything fails, goto uuiderror to clean them up.
2085-
*/
2086-
if (subtype == 3 && options->uuid_rep == CSHARP_LEGACY) {
2087-
/* Legacy C# byte order */
2088-
if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1)
2089-
goto uuiderror;
2027+
binary_type = _get_object(state->Binary, "bson", "Binary");
2028+
if (binary_type == NULL) {
2029+
goto uuiderror;
20902030
}
2091-
else {
2092-
if (subtype == 3 && options->uuid_rep == JAVA_LEGACY) {
2093-
/* Convert from legacy java byte order */
2094-
char big_endian[16];
2095-
_fix_java(buffer + *position, big_endian);
2096-
/* Free the previously created PyString object */
2097-
Py_DECREF(data);
2098-
#if PY_MAJOR_VERSION >= 3
2099-
data = PyBytes_FromStringAndSize(big_endian, length);
2100-
#else
2101-
data = PyString_FromStringAndSize(big_endian, length);
2102-
#endif
2103-
if (data == NULL)
2104-
goto uuiderror;
2105-
}
2106-
if ((PyDict_SetItemString(kwargs, "bytes", data)) == -1)
2107-
goto uuiderror;
21082031

2032+
binary_value = PyObject_CallFunction(binary_type, "(Oi)", data, subtype);
2033+
if (binary_value == NULL) {
2034+
goto uuiderror;
21092035
}
2110-
if ((type_to_create = _get_object(state->UUID, "uuid", "UUID"))) {
2111-
value = PyObject_Call(type_to_create, args, kwargs);
2112-
Py_DECREF(type_to_create);
2036+
2037+
if (uuid_rep == UNSPECIFIED) {
2038+
value = binary_value;
2039+
Py_INCREF(value);
2040+
} else {
2041+
if (subtype == 4) {
2042+
uuid_rep = STANDARD;
2043+
} else if (uuid_rep == STANDARD) {
2044+
uuid_rep = PYTHON_LEGACY;
2045+
}
2046+
value = PyObject_CallMethod(binary_value, "as_uuid", "(i)", uuid_rep);
21132047
}
21142048

2115-
Py_DECREF(args);
2116-
Py_DECREF(kwargs);
2049+
uuiderror:
2050+
Py_XDECREF(binary_type);
2051+
Py_XDECREF(binary_value);
21172052
Py_DECREF(data);
21182053
if (!value) {
21192054
goto invalid;
21202055
}
2121-
21222056
*position += length;
21232057
break;
2124-
2125-
uuiderror:
2126-
Py_DECREF(args);
2127-
Py_DECREF(kwargs);
2128-
Py_XDECREF(data);
2129-
goto invalid;
21302058
}
21312059

21322060
#if PY_MAJOR_VERSION >= 3

0 commit comments

Comments
 (0)