Skip to content

Commit eb34fe3

Browse files
committed
Implement a high-performance UUID type
1 parent af03e78 commit eb34fe3

File tree

4 files changed

+190
-10
lines changed

4 files changed

+190
-10
lines changed

codecs/uuid.pyx

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,23 @@
55
# the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0
66

77

8-
import uuid
9-
10-
11-
_UUID = uuid.UUID
12-
13-
148
cdef uuid_encode(CodecContext settings, WriteBuffer wbuf, obj):
15-
if cpython.PyUnicode_Check(obj):
16-
obj = _UUID(obj)
9+
cdef:
10+
char buf[16]
1711

18-
bytea_encode(settings, wbuf, obj.bytes)
12+
if type(obj) is pg_UUID:
13+
wbuf.write_int32(<int32_t>16)
14+
wbuf.write_cstr((<PgBaseUUID>obj)._data, 16)
15+
elif cpython.PyUnicode_Check(obj):
16+
pg_uuid_bytes_from_str(obj, buf)
17+
wbuf.write_int32(<int32_t>16)
18+
wbuf.write_cstr(buf, 16)
19+
else:
20+
bytea_encode(settings, wbuf, obj.bytes)
1921

2022

2123
cdef uuid_decode(CodecContext settings, FRBuffer *buf):
22-
return _UUID(bytes=bytea_decode(settings, buf))
24+
if buf.len != 16:
25+
raise TypeError(
26+
f'cannot decode UUID, expected 16 bytes, got {buf.len}')
27+
return pg_uuid_from_buf(frb_read_all(buf))

cpythonx.pxd

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,15 @@
88
from cpython cimport Py_buffer
99

1010
cdef extern from "Python.h":
11+
int PyUnicode_1BYTE_KIND
12+
1113
int PyByteArray_Resize(object, ssize_t) except -1
1214
object PyByteArray_FromStringAndSize(const char *, ssize_t)
1315
char* PyByteArray_AsString(object)
1416

1517
object PyUnicode_FromString(const char *u)
1618
const char* PyUnicode_AsUTF8AndSize(
1719
object unicode, ssize_t *size) except NULL
20+
21+
object PyUnicode_FromKindAndData(
22+
int kind, const void *buffer, Py_ssize_t size)

pgproto.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ from . import types as pgproto_types
2525
include "./consts.pxi"
2626
include "./frb.pyx"
2727
include "./buffer.pyx"
28+
include "./uuid.pyx"
2829

2930
include "./codecs/context.pyx"
3031

uuid.pyx

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
import functools
2+
import uuid
3+
4+
from libc.stdint cimport uint64_t, uint8_t
5+
from libc.string cimport memcpy
6+
7+
8+
# A more efficient UUID type implementation
9+
# (6-7x faster than the uuid.UUID).
10+
11+
12+
cdef const char *_hexmap = b"0123456789abcdef"
13+
14+
cdef char _hextable[256]
15+
_hextable[:] = [
16+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
17+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
18+
-1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,
19+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
20+
-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
21+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
22+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
23+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
24+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
25+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
26+
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
27+
]
28+
29+
30+
cdef inline char i64_to_hex(uint64_t num, char *s):
31+
cdef:
32+
char i = 15
33+
char ch
34+
35+
while i >= 0:
36+
s[<uint8_t>i] = _hexmap[num & 0x0F]
37+
num >>= 4
38+
i -= 1
39+
40+
return 0
41+
42+
43+
cdef pg_uuid_from_buf(char *buf):
44+
cdef:
45+
PgBaseUUID u = UUID.__new__(UUID)
46+
memcpy(u._data, buf, 16)
47+
u._int = None
48+
return u
49+
50+
51+
cdef pg_uuid_bytes_from_str(str u, char *out):
52+
cdef:
53+
char *orig_buf
54+
Py_ssize_t size
55+
unsigned char ch
56+
uint8_t acc, part, acc_set
57+
uint8_t i, j
58+
59+
orig_buf = <char*>cpythonx.PyUnicode_AsUTF8AndSize(u, &size)
60+
if size > 36 or size < 32:
61+
raise ValueError(
62+
f'invalid UUID {u!r}: '
63+
f'length must be between 32..36 characters, got {size}')
64+
65+
acc_set = 0
66+
j = 0
67+
for i in range(0, size):
68+
ch = <unsigned char>orig_buf[i]
69+
if ch == <unsigned char>b'-':
70+
continue
71+
72+
part = _hextable[ch]
73+
if part == <uint8_t>-1:
74+
if ch >= 0x20 and ch <= 0x7e:
75+
raise ValueError(
76+
f'invalid UUID {u!r}: unexpected character {chr(ch)!r}')
77+
else:
78+
raise ValueError('invalid UUID {u!r}: unexpected character')
79+
80+
if acc_set:
81+
acc |= part
82+
out[j] = <char>acc
83+
acc_set = 0
84+
j += 1
85+
else:
86+
acc = <uint8_t>(part << 4)
87+
acc_set = 1
88+
89+
if j > 16 or (j == 16 and acc_set):
90+
raise ValueError(
91+
f'invalid UUID {u!r}: decodes to more than 16 bytes')
92+
93+
if j != 16:
94+
raise ValueError(
95+
f'invalid UUID {u!r}: decodes to less than 16 bytes')
96+
97+
98+
cdef class PgBaseUUID:
99+
100+
cdef:
101+
char _data[16]
102+
object _int
103+
104+
def __init__(self, inp):
105+
cdef:
106+
char *buf
107+
Py_ssize_t size
108+
109+
if cpython.PyBytes_Check(inp):
110+
cpython.PyBytes_AsStringAndSize(inp, &buf, &size)
111+
if size != 16:
112+
raise ValueError(f'16 bytes were expected, got {size}')
113+
memcpy(self._data, buf, 16)
114+
115+
elif cpython.PyUnicode_Check(inp):
116+
pg_uuid_bytes_from_str(inp, self._data)
117+
else:
118+
raise TypeError(f'a bytes or str object expected, got {inp!r}')
119+
120+
self._int = None
121+
122+
property bytes:
123+
def __get__(self):
124+
return cpython.PyBytes_FromStringAndSize(self._data, 16)
125+
126+
property int:
127+
def __get__(self):
128+
if self._int is None:
129+
# The cache is important because `self.int` can be
130+
# used multiple times by __hash__ etc.
131+
self._int = int.from_bytes(self.bytes, 'big')
132+
return self._int
133+
134+
def __str__(self):
135+
cdef:
136+
uint64_t u
137+
char buf[16]
138+
char out[36]
139+
140+
u = <uint64_t>hton.unpack_int64(self._data)
141+
i64_to_hex(u, buf)
142+
memcpy(out, buf, 8)
143+
out[8] = b'-'
144+
memcpy(out + 9, buf + 8, 4)
145+
out[13] = b'-'
146+
memcpy(out + 14, buf + 12, 4)
147+
out[18] = b'-'
148+
149+
u = <uint64_t>hton.unpack_int64(self._data + 8)
150+
i64_to_hex(u, buf)
151+
memcpy(out + 19, buf, 4)
152+
out[23] = b'-'
153+
memcpy(out + 24, buf + 4, 12)
154+
155+
return cpythonx.PyUnicode_FromKindAndData(
156+
cpythonx.PyUnicode_1BYTE_KIND, <void*>out, 36)
157+
158+
def __repr__(self):
159+
return f"UUID('{self}')"
160+
161+
def __reduce__(self):
162+
return (type(self), (self.bytes,))
163+
164+
165+
class UUID(PgBaseUUID, uuid.UUID):
166+
__slots__ = ()
167+
168+
169+
cdef pg_UUID = UUID

0 commit comments

Comments
 (0)