Skip to content

Commit f3324ce

Browse files
committed
Faster sets, empty set option, faster non-float serialize
1 parent df92f1f commit f3324ce

File tree

6 files changed

+122
-81
lines changed

6 files changed

+122
-81
lines changed

ddbcereal/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,17 @@
1717
from ddbcereal.serializing import Serializer
1818
from ddbcereal.types import DateFormat, DynamoDBType, PythonNumber
1919

20-
VERSION = 2, 0, 1
20+
VERSION = 2, 1, 0
2121

2222
ISO_8601 = DateFormat.ISO_8601
2323
UNIX_MILLISECONDS = DateFormat.UNIX_MILLISECONDS
2424
UNIX_SECONDS = DateFormat.UNIX_SECONDS
2525

26+
BINARY_SET = DynamoDBType.BINARY_SET
2627
NUMBER = DynamoDBType.NUMBER
28+
NUMBER_SET = DynamoDBType.NUMBER_SET
2729
STRING = DynamoDBType.STRING
30+
STRING_SET = DynamoDBType.STRING_SET
2831

2932
DECIMAL_ONLY = PythonNumber.DECIMAL_ONLY
3033
FRACTION_ONLY = PythonNumber.FRACTION_ONLY

ddbcereal/serializing.py

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
import decimal
16-
import fractions
1716
from base64 import b64encode
1817
from collections.abc import ByteString, Set
1918
from datetime import datetime
@@ -25,13 +24,11 @@
2524

2625
NoneType = type(None)
2726

28-
2927
DDB_NUMBER_EMIN = -128
3028
DDB_NUMBER_EMAX = 126
3129
DDB_NUMBER_PREC = 38
3230
INFINITY = decimal.Decimal('Infinity')
3331
NAN = decimal.Decimal('NaN')
34-
NUMBER_TYPES = decimal.Decimal, fractions.Fraction, int, float
3532

3633

3734
class Serializer:
@@ -46,6 +43,7 @@ def __init__(
4643
raw_transport=False,
4744
datetime_format=DateFormat.ISO_8601,
4845
fraction_type=DynamoDBType.NUMBER,
46+
empty_set_type=DynamoDBType.NUMBER_SET
4947
):
5048
decimal_traps = [
5149
decimal.Clamped,
@@ -61,21 +59,29 @@ def __init__(
6159
_serialize_bytes = serialize_bytes
6260

6361
if validate_numbers:
64-
serialize_num = self._serialize_number_strict
62+
_serialize_float = self._serialize_float_strict
63+
_serialize_number = self._serialize_number_strict
64+
else:
65+
_serialize_float = serialize_number
66+
_serialize_number = serialize_number
67+
self._serialize_num = _serialize_number
68+
69+
if fraction_type == DynamoDBType.NUMBER:
70+
_serialize_fraction = self._serialize_fraction_as_number
6571
else:
66-
serialize_num = serialize_number
67-
self._serialize_num = serialize_num
72+
_serialize_fraction = serialize_any_as_string
6873

6974
self._type_methods: MutableMapping[type, Callable] = {
7075
bool: serialize_bool,
7176
bytes: _serialize_bytes,
7277
bytearray: _serialize_bytes,
7378
memoryview: _serialize_bytes,
7479
datetime: date_serializers[datetime_format],
75-
decimal.Decimal: serialize_num,
80+
decimal.Decimal: _serialize_number,
7681
dict: self._serialize_mapping,
77-
float: serialize_num if allow_inexact else serialize_float_exact,
78-
int: serialize_num,
82+
float: _serialize_float,
83+
Fraction: _serialize_fraction,
84+
int: _serialize_number,
7985
list: self._serialize_listlike,
8086
Mapping: self._serialize_mapping,
8187
NoneType: serialize_none,
@@ -85,11 +91,6 @@ def __init__(
8591
str: serialize_str,
8692
}
8793

88-
if fraction_type == DynamoDBType.NUMBER:
89-
self._type_methods[Fraction] = self._serialize_fraction_as_number
90-
else:
91-
self._type_methods[Fraction] = serialize_any_as_string
92-
9394
decimal_ctx = decimal.Context(
9495
Emin=DDB_NUMBER_EMIN,
9596
Emax=DDB_NUMBER_EMAX,
@@ -100,6 +101,8 @@ def __init__(
100101
self._create_decimal = decimal_ctx.create_decimal
101102
self._decimal_divide = decimal_ctx.divide
102103

104+
self._empty_set = {empty_set_type.value: []}
105+
103106
def serialize(self, value: Any) -> DynamoDBValue:
104107
value_type = type(value)
105108
try:
@@ -130,6 +133,18 @@ def _serialize_fraction_as_number(self, value: Fraction):
130133
def _serialize_number_strict(
131134
self,
132135
value: Union[int, float, decimal.Decimal]
136+
):
137+
try:
138+
dec_value = self._create_decimal(value)
139+
except decimal.Inexact:
140+
raise NumberInexactError()
141+
if dec_value in (INFINITY, NAN):
142+
raise NumberNotAllowedError(f'{dec_value} not supported')
143+
return {'N': str(dec_value)}
144+
145+
def _serialize_float_strict(
146+
self,
147+
value: Union[int, float, decimal.Decimal]
133148
):
134149
try:
135150
dec_value = self._create_decimal(str(value))
@@ -144,23 +159,22 @@ def _serialize_listlike(self, value: Union[list, tuple]):
144159

145160
def _serialize_set(self, value: Set):
146161
if all(isinstance(element, str) for element in value):
147-
return {'SS': [element for element in value]}
148-
if all(isinstance(element, NUMBER_TYPES) for element in value):
149-
return {
150-
'NS': [
151-
val
152-
for element in value
153-
for val in self.serialize(element).values()
154-
]
155-
}
156-
if all(isinstance(element, ByteString) for element in value):
157-
return {
158-
'BS': [
159-
val
160-
for element in value
161-
for val in self.serialize(element).values()
162-
]
163-
}
162+
# Shortcut to faster string set:
163+
return {'SS': list(value)}
164+
if not value:
165+
return self._empty_set
166+
vals = [
167+
self.serialize(element)
168+
for element in value
169+
]
170+
first_type = next(iter(vals[0]))
171+
if (
172+
first_type in {'N', 'S', 'B'}
173+
and all(first_type in val for val in vals)
174+
):
175+
return {first_type + 'S': [val[first_type] for val in vals]}
176+
177+
raise ValueError('Invalid or mixed types in set.')
164178

165179
def _serialize_mapping(self, value: Mapping):
166180
return {

ddbcereal/types.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,11 @@ class DateFormat(enum.Enum):
3131

3232

3333
class DynamoDBType(enum.Enum):
34-
NUMBER = enum.auto()
35-
STRING = enum.auto()
34+
NUMBER = 'N'
35+
NUMBER_SET = 'NS'
36+
STRING = 'S'
37+
STRING_SET = 'SS'
38+
BINARY_SET = 'BS'
3639

3740

3841
class PythonNumber(enum.Enum):

docs/changelog.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
Changelog
22
=========
3+
2.1.0
4+
-----
5+
* Empty Python set is now serialized to a configurable Set type.
6+
* Faster serialization for Number Sets and Binary Sets.
7+
* Behind the scenes, DynamoDBType enumerations are now ``str``\ s of
8+
their type symbol.
9+
310
2.0.1
411
-----
512
* Fix exceptions from typing on pre-3.9 Python

docs/performance.rst

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@ happen once in the lifecycle of an application.
77

88
Squeezing More Performance
99
--------------------------
10-
By default, time is spent on validating that data being supplied to a
11-
serializer will be allowed by DynamoDB once serialized. Significantly faster
12-
serialization is possible when not validating input before sending it to
13-
DynamoDB.
10+
By default, time is spent validating that data being supplied to a serializer
11+
will be allowed by DynamoDB once serialized. Significantly faster serialization
12+
is possible when not validating input before sending it to DynamoDB.
1413

1514
.. code-block:: python
1615
@@ -29,13 +28,15 @@ lookup every time ``serialize`` is called.
2928

3029
Known Limitations
3130
-----------------
32-
ddbcereal is faster than boto3 at serializing everything *except* for Number
33-
Sets (e.g. `set[Decimal]`, `frozenset[int]`)
34-
31+
* Constructing a serializer or deserializer is slow. It should be done once and
32+
the serializer or deserializer should be reused.
33+
* Map serialization and deserialization uses recursion in its current
34+
implementation, so deep Maps will use more memory and could take longer than
35+
expected to process. boto3's Map processing has this same issue.
3536

3637
Benchmarks
3738
----------
38-
.. list-table:: Serializer Benchmarks (ddbcereal 1.0.0 cpython 3.9.4, 3.1 GHz
39+
.. list-table:: Serializer Benchmarks (ddbcereal 2.1.0 cpython 3.9.4, 3.1 GHz
3940
Intel Core i7)
4041
:widths: 25 25 50
4142
:header-rows: 1
@@ -47,26 +48,26 @@ Benchmarks
4748
- 60x Slower
4849
- 60x Slower
4950
* - Decimal to Number
50-
- 1.4x faster
51-
- 2.9x faster
51+
- 1.9x faster
52+
- 2.8x faster
5253
* - int to Number
53-
- 1.4x faster
54-
- 2.4x faster
54+
- 2x faster
55+
- 3x faster
5556
* - str to String
5657
- 3.6x faster
5758
- 3.6x faster
5859
* - Mixed number types Set to Number Set
59-
- 1.2x slower
6060
- 1.1x faster
61+
- 1.4x faster
6162
* - Set[str] to String Set
62-
- 3.9x faster
63-
- 3.9x faster
63+
- 4.2x faster
64+
- 4.2x faster
6465
* - List of mixed types to List
65-
- 3.1x faster
66-
- 4.1x faster
66+
- 3.4x faster
67+
- 4x faster
6768
* - dict of mixed types to Map
68-
- 3.6x faster
69+
- 4x faster
6970
- 4.6x faster
7071
* - dict of 2 levels to Map
71-
- 3.4x faster
72-
- 4.6x faster
72+
- 4x faster
73+
- 4.8x faster

docs/usage.rst

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -98,40 +98,53 @@ your needs.
9898
validate_numbers=True, \
9999
raw_transport=False, \
100100
datetime_format=ddbcereal.ISO_8601, \
101-
fraction_type=ddbcereal.NUMBER)
101+
fraction_type=ddbcereal.NUMBER, \
102+
empty_set_type=ddbcereal.NUMBER_SET)
102103

103104
:param bool allow_inexact: Whether to allow numbers whose exact value can't
104105
be represented in DynamoDB or Python. DynamoDB's Number type stores exact
105-
numbers (fixed decimals). floats are considered inexact by their nature
106+
numbers (fixed decimals). ``float``\ s are considered inexact by their nature
106107
and are only accepted with this option enabled.
107108

108109
:param bool validate_numbers: Whether to check inputted numbers to determine
109110
if they're valid for storage in DynamoDB and whether or not they conform
110111
to the ``allow_inexact`` parameter.
111112

112113
When enabled, attempts to serialize invalid numbers will result in a
113-
``ValueError`` being raised. When disabled, serialization is faster, but
114-
mistakes might only be caught after the serialized value has been sent
115-
to DynamoDB.
114+
:py:exc:`ValueError` being raised. When disabled, serialization is
115+
faster, but mistakes might only be caught after the serialized value has
116+
been sent to DynamoDB.
116117

117118
:param bool raw_transport: Indicates that values have not been
118119
pre-processed. For example, Base 64 strings have not been converted to
119120
bytes. Use this when using the AWS HTTP API without an AWS SDK.
120121

121-
:param DateFormat datetime_format: Determines how Python datetimes should be
122-
serialized. Possible enumerations are available on the ddbcereal top
123-
level module and the DateFormat enum:
124-
125-
.. autoclass:: ddbcereal.DateFormat
126-
:members:
127-
128-
:param DynamoDBType fraction_type: Determines how Python ``Fraction`` s should
129-
be serialized. Possible enumerations are available on the ddbcereal top
130-
level module and the DynamoDBType enum:
131-
132-
.. autoclass:: ddbcereal.DynamoDBType
133-
:members:
134-
:undoc-members:
122+
:param DateFormat datetime_format: Determines how Python
123+
:py:class:`~datetime.datetime`\ s should be serialized. Possible
124+
enumerations are available on the ddbcereal top level module and the
125+
:py:class:`~ddbcereal.DateFormat` enum.
126+
127+
:param DynamoDBType fraction_type: Determines how Python
128+
:py:class:`~fractions.Fraction`\ s should be serialized. Must be
129+
:py:attr:`~ddbcereal.DynamoDBType.NUMBER` or
130+
:py:attr:`~ddbcereal.DynamoDBType.STRING`. Enumerations are available on
131+
the ddbcereal top level module and the
132+
:py:class:`~ddbcereal.DynamoDBType` enum.
133+
134+
:param DynamoDBType empty_set_type: When an empty set is serialized, make
135+
the set this DynamoDB type. Must be
136+
:py:attr:`~ddbcereal.DynamoDBType.NUMBER_SET`,
137+
:py:attr:`~ddbcereal.DynamoDBType.STRING_SET`, or
138+
:py:attr:`~ddbcereal.DynamoDBType.BINARY_SET`. Enumerations are available
139+
on the ddbcereal top level module and the
140+
:py:class:`~ddbcereal.DynamoDBType` enum.
141+
142+
.. autoclass:: ddbcereal.DateFormat
143+
:members:
144+
145+
.. autoclass:: ddbcereal.DynamoDBType
146+
:members:
147+
:undoc-members:
135148

136149
Deserialize DynamoDB Data into Python
137150
-------------------------------------
@@ -173,23 +186,23 @@ Deserializer Options
173186

174187
:param bool allow_inexact: Whether to allow conversion to a Python number
175188
that won't exactly convey the value stored in DynamoDB (e.g. rounding of
176-
significant digits is required). Deserializing numbers to floats is only
177-
possible when this is enabled.
189+
significant digits is required). Deserializing numbers to ``float``\ s is
190+
only possible when this is enabled.
178191

179192
:param bool raw_transport: Indicates to deserialize values to be transported
180193
without additional processing. Bytes will be transported as Base 64
181194
strings. Use this when using the AWS HTTP API without an AWS SDK.
182195

183196
:param PythonNumber python_number: Determines how DynamoDB Numbers should be
184197
serialized. Possible enumerations are available on the ddbcereal top
185-
level module and the PythonNumber enum:
198+
level module and the :py:class:`PythonNumber` enum:
186199

187200
.. autoclass:: ddbcereal.PythonNumber
188201
:members:
189202

190203
:param python_null_value: The Python value to convert DynamoDB Nulls to.
191-
Defaults to ``None``. An immutable value is recommended. Ignored if
192-
``python_null_factory`` is supplied.
204+
Defaults to :py:class:`None`. An immutable value is recommended. Ignored
205+
if ``python_null_factory`` is supplied.
193206

194207
:param Callable[[], Any] python_null_factory: A function invoked for every
195208
DynamoDB Null value. The Null is converted to the return value of the
@@ -202,9 +215,9 @@ conform to. They find appropriate Python types for the few types of data that
202215
DynamoDB can store. If you want to deserialize values into more advanced types,
203216
consider using a marshalling library like marshmallow or Pydantic.
204217

205-
They can take the dict produced by deserialize_item and create an object
206-
based on a schema, an object with fields of built-in types like dates, deques
207-
and of custom types.
218+
They can take the dict produced by deserialize_item and create an objec based
219+
on a schema, an object with fields of built-in types like dates, deques and of
220+
custom types.
208221

209222
See
210223
:py:meth:`marshmallow.Schema.load` and

0 commit comments

Comments
 (0)