Skip to content

Commit 1ad14de

Browse files
author
Lucas McDonald
committed
m
1 parent 899af22 commit 1ad14de

File tree

12 files changed

+272
-15
lines changed

12 files changed

+272
-15
lines changed

CHANGELOG.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22
Changelog
33
*********
44

5+
4.0.1 -- 2025-03-XX
6+
7+
Fixes
8+
-----------
9+
* fix: Improve header serialization
10+
`#TODO <https://github.com/aws/aws-encryption-sdk-python/pull/TODO>`_
11+
ESDK-Python <4.0.1 would truncate non-ASCII key provider IDs it wrote to message headers.
12+
If a Raw or Custom MasterKeyProvider or Keyring supplied a non-ASCII key provider ID / key namespace,
13+
ESDK-Python would truncate the the key provider ID it wrote to the message's header.
14+
The message can be decrypted by replacing the truncated provider ID with the expected provider ID in decryption code.
15+
Contact AWS for any questions about this approach.
16+
17+
Maintenance
18+
-----------
19+
* deps: Extend supported `MPL`_ version to include 1.9.1
20+
521
4.0.0 -- 2024-10-29
622
===================
723

@@ -425,6 +441,7 @@ Minor
425441
===================
426442
* Initial public release
427443

444+
.. _MPL: https://github.com/aws/aws-cryptographic-material-providers-library
428445
.. _breaking changes in attrs 17.1.0: https://attrs.readthedocs.io/en/stable/changelog.html
429446
.. _tox: https://tox.readthedocs.io/en/latest/
430447
.. _pylint: https://www.pylint.org/

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Required Prerequisites
4242
Recommended Prerequisites
4343
=========================
4444

45-
* aws-cryptographic-material-providers: == 1.7.4
45+
* aws-cryptographic-material-providers: >=1.7.4
4646
* Requires Python 3.11+.
4747

4848
Installation

performance_tests/README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Required Prerequisites
4949
Recommended Prerequisites
5050
=========================
5151

52-
* aws-cryptographic-material-providers: == 1.7.4
52+
* aws-cryptographic-material-providers: >= 1.7.4
5353
* Requires Python 3.11+.
5454

5555
*****
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
aws-cryptographic-material-providers==1.7.4
1+
aws-cryptographic-material-providers>=1.7.4,<=1.9.1

requirements_mpl.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
aws-cryptographic-material-providers==1.7.4
1+
aws-cryptographic-material-providers>=1.7.4,<=1.9.1

setup.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,8 @@ def get_requirements():
3939
keywords="aws-encryption-sdk aws kms encryption",
4040
license="Apache License 2.0",
4141
install_requires=get_requirements(),
42-
# pylint: disable=fixme
43-
# TODO-MPL: Point at PyPI once MPL is released.
44-
# This blocks releasing ESDK-Python MPL integration.
4542
extras_require={
46-
"MPL": ["aws-cryptographic-material-providers==1.7.4"],
43+
"MPL": ["aws-cryptographic-material-providers<=1.9.1"],
4744
},
4845
classifiers=[
4946
"Development Status :: 5 - Production/Stable",

src/aws_encryption_sdk/internal/formatting/deserialize.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,17 @@ def deserialize_encrypted_data_keys(stream, max_encrypted_data_keys=None):
145145
(key_provider_information,) = unpack_values(">{}s".format(key_provider_information_length), stream)
146146
(encrypted_data_key_length,) = unpack_values(">H", stream)
147147
encrypted_data_key = stream.read(encrypted_data_key_length)
148+
# ESDK-Python <4.0.1 incorrectly computed the key provider length for non-ASCII key provider IDs.
149+
# The length in the header was computed as the length of the key provider ID as a string instead of
150+
# the length of the key provider ID as UTF-8 bytes.
151+
# If a non-ASCII key provider ID were supplied, the key provider ID's UTF-8 bytes written to the header
152+
# would be truncated, and attempting to decrypt the message would result in a deserialization error.
153+
# That error would be raised when calling `to_str(key_provider_identifier)` below.
154+
# An impacted message can be decrypted by replacing the truncated provider ID with the expected provider ID
155+
# in decryption code.
156+
# Contact AWS for any questions about this approach.
157+
# ESDK-Python >=4.0.1 corrects the serialization logic and writes the correct length and expected bytes
158+
# to the message header.
148159
encrypted_data_keys.add(
149160
EncryptedDataKey(
150161
key_provider=MasterKeyInfo(

src/aws_encryption_sdk/internal/formatting/serialize.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,30 @@ def serialize_encrypted_data_key(encrypted_data_key):
3535
"H" # encrypted data key length
3636
"{enc_data_key_len}s" # encrypted data key
3737
)
38+
# ESDK-Python <4.0.1 incorrectly computed len_key_provider_id_bytes for non-ASCII key provider IDs.
39+
# len_key_provider_id_bytes was computed as the length of the key provider ID as a string instead of
40+
# the length of the key provider ID as UTF-8 bytes.
41+
# If a non-ASCII key provider ID were supplied, the key provider ID as UTF-8 bytes written to the header
42+
# would be truncated, and attempting to decrypt the message would result in a deserialization error.
43+
# The message can be decrypted by replacing the truncated provider ID with the expected provider ID
44+
# in decryption code.
45+
# Contact AWS for any questions about this approach.
46+
# ESDK-Python >=4.0.1 corrects the serialization logic and writes the correct length and expected bytes
47+
# to the message header.
48+
key_provider_id_bytes = to_bytes(encrypted_data_key.key_provider.provider_id)
49+
len_key_provider_id_bytes = len(key_provider_id_bytes)
50+
key_info_bytes = to_bytes(encrypted_data_key.key_provider.key_info)
51+
len_key_info_bytes = len(key_info_bytes)
3852
return struct.pack(
3953
encrypted_data_key_format.format(
40-
provider_id_len=len(encrypted_data_key.key_provider.provider_id),
41-
provider_info_len=len(encrypted_data_key.key_provider.key_info),
54+
provider_id_len=len_key_provider_id_bytes,
55+
provider_info_len=len_key_info_bytes,
4256
enc_data_key_len=len(encrypted_data_key.encrypted_data_key),
4357
),
44-
len(encrypted_data_key.key_provider.provider_id),
45-
to_bytes(encrypted_data_key.key_provider.provider_id),
46-
len(encrypted_data_key.key_provider.key_info),
47-
to_bytes(encrypted_data_key.key_provider.key_info),
58+
len_key_provider_id_bytes,
59+
key_provider_id_bytes,
60+
len_key_info_bytes,
61+
key_info_bytes,
4862
len(encrypted_data_key.encrypted_data_key),
4963
encrypted_data_key.encrypted_data_key,
5064
)

test/unit/test_deserialize.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,16 @@ def test_deserialize_body_frame_final(self):
265265
assert test_body == VALUES["deserialized_body_final_frame_single"]
266266
assert test_final
267267

268+
def test_GIVEN_final_frame_content_length_equals_header_frame_length_WHEN_deserialize_header_THEN_no_error(self):
269+
"""Validate that the deserialize_body_frame function
270+
behaves as expected for a valid final body frame
271+
where the final frame length equals the header frame length.
272+
"""
273+
stream = io.BytesIO(VALUES["serialized_final_frame_512_length"])
274+
aws_encryption_sdk.internal.formatting.deserialize.deserialize_frame(
275+
stream=stream, header=VALUES["deserialized_header_frame_512_frame"]
276+
)
277+
268278
def test_deserialize_body_frame_final_invalid_final_frame_length(self):
269279
"""Validate that the deserialize_body_frame function
270280
behaves as expected for a valid final body frame.

test/unit/test_serialize.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
22
# SPDX-License-Identifier: Apache-2.0
33
"""Unit test suite for aws_encryption_sdk.internal.formatting.serialize"""
4+
import io
5+
import struct
6+
47
import pytest
58
from mock import MagicMock, patch, sentinel
69

10+
import aws_encryption_sdk.internal.formatting.deserialize
711
import aws_encryption_sdk.internal.formatting.serialize
812
from aws_encryption_sdk.exceptions import SerializationError
913
from aws_encryption_sdk.identifiers import ContentAADString, SerializationVersion
@@ -15,6 +19,8 @@
1519

1620
pytestmark = [pytest.mark.unit, pytest.mark.local]
1721

22+
provider_input_strings = ["", "abc", "𐀂", "abc𐀂", "𐀂abc", "秘密代码", "abc秘密代码", "秘密代码abc", "秘密代码abc𐀂", "𐀂abc秘密代码123𐀂"]
23+
1824

1925
@pytest.mark.parametrize(
2026
"sequence_number, error_message",
@@ -80,6 +86,146 @@ def apply_fixtures(self):
8086
self.mock_encrypt_patcher.stop()
8187
self.mock_valid_frame_length_patcher.stop()
8288

89+
@pytest.mark.parametrize("provider_id", provider_input_strings)
90+
@pytest.mark.parametrize("provider_info", provider_input_strings)
91+
def test_GIVEN_valid_encrypted_data_key_WHEN_serialize_encrypted_data_key_THEN_deserialize_equals_input(
92+
self,
93+
provider_id,
94+
provider_info,
95+
):
96+
# Given: Some valid encrypted data key
97+
key_provider = MasterKeyInfo(provider_id=provider_id, key_info=provider_info)
98+
encrypted_data_key = EncryptedDataKey(
99+
key_provider=key_provider, encrypted_data_key=VALUES["encrypted_data_key"]
100+
)
101+
102+
# When: serialize_encrypted_data_key
103+
serialized_edk = aws_encryption_sdk.internal.formatting.serialize.serialize_encrypted_data_key(
104+
encrypted_data_key=encrypted_data_key
105+
)
106+
107+
# Then: Can deserialize the value
108+
serialized_edks = bytes()
109+
# Hardcode to have only 1 EDK
110+
serialized_edks += struct.pack(">H", 1)
111+
serialized_edks += serialized_edk
112+
# Deserialization must not raise exception
113+
deserialized = aws_encryption_sdk.internal.formatting.deserialize.deserialize_encrypted_data_keys(
114+
stream=io.BytesIO(serialized_edks)
115+
)
116+
assert deserialized == {encrypted_data_key}
117+
assert len(deserialized) == 1
118+
deserialized_edk = list(deserialized)[0]
119+
assert deserialized_edk.key_provider == encrypted_data_key.key_provider
120+
assert deserialized_edk.key_provider.provider_id == encrypted_data_key.key_provider.provider_id
121+
assert deserialized_edk.key_provider.key_info == encrypted_data_key.key_provider.key_info
122+
assert deserialized_edk.encrypted_data_key == encrypted_data_key.encrypted_data_key
123+
124+
@pytest.mark.parametrize("edk_1_provider_id", provider_input_strings)
125+
@pytest.mark.parametrize("edk_1_provider_info", provider_input_strings)
126+
@pytest.mark.parametrize("edk_2_provider_id", provider_input_strings)
127+
@pytest.mark.parametrize("edk_2_provider_info", provider_input_strings)
128+
def test_GIVEN_two_distinct_valid_encrypted_data_keys_WHEN_serialize_encrypted_data_keys_THEN_deserialize_equals_inputs( # noqa pylint: disable=line-too-long
129+
self,
130+
edk_1_provider_id,
131+
edk_1_provider_info,
132+
edk_2_provider_id,
133+
edk_2_provider_info,
134+
):
135+
# pylint: disable=too-many-locals
136+
# Given: Two distinct valid encrypted data keys
137+
edk_1_key_provider = MasterKeyInfo(provider_id=edk_1_provider_id, key_info=edk_1_provider_info)
138+
encrypted_data_key_1 = EncryptedDataKey(
139+
key_provider=edk_1_key_provider, encrypted_data_key=VALUES["encrypted_data_key"]
140+
)
141+
142+
edk_2_key_provider = MasterKeyInfo(provider_id=edk_2_provider_id, key_info=edk_2_provider_info)
143+
encrypted_data_key_2 = EncryptedDataKey(
144+
key_provider=edk_2_key_provider, encrypted_data_key=VALUES["encrypted_data_key"]
145+
)
146+
147+
# Must be distinct
148+
if encrypted_data_key_1 == encrypted_data_key_2:
149+
return
150+
151+
# When: serialize_encrypted_data_key
152+
serialized_edk_1 = aws_encryption_sdk.internal.formatting.serialize.serialize_encrypted_data_key(
153+
encrypted_data_key=encrypted_data_key_1
154+
)
155+
serialized_edk_2 = aws_encryption_sdk.internal.formatting.serialize.serialize_encrypted_data_key(
156+
encrypted_data_key=encrypted_data_key_2
157+
)
158+
159+
# Then: Can deserialize the value
160+
serialized_edks = bytes()
161+
# Hardcode to have only 2 EDKs
162+
serialized_edks += struct.pack(">H", 2)
163+
serialized_edks += serialized_edk_1
164+
serialized_edks += serialized_edk_2
165+
# Deserialization must not raise exception
166+
deserialized = aws_encryption_sdk.internal.formatting.deserialize.deserialize_encrypted_data_keys(
167+
stream=io.BytesIO(serialized_edks)
168+
)
169+
assert deserialized == {encrypted_data_key_1, encrypted_data_key_2}
170+
assert len(deserialized) == 2
171+
deserialized_edk_list = list(deserialized)
172+
173+
deserialized_edk_some = deserialized_edk_list[0]
174+
deserialized_edk_other = deserialized_edk_list[1]
175+
176+
assert (
177+
(deserialized_edk_some == encrypted_data_key_1 and deserialized_edk_other == encrypted_data_key_2)
178+
or (deserialized_edk_some == encrypted_data_key_2 and deserialized_edk_other == encrypted_data_key_1)
179+
)
180+
181+
def test_GIVEN_invalid_encrypted_data_key_WHEN_serialize_THEN_raises_UnicodeEncodeError(
182+
self,
183+
):
184+
# Given: Some invalid encrypted data key
185+
186+
# This is invalid because "\ud800\udc02" cannot be encoded to UTF-8.
187+
# This value MUST be able to be encoded to UTF-8, or serialization will fail.
188+
invalid_provider_string = "\ud800\udc02"
189+
190+
# Then: raises UnicodeEncodeError
191+
with pytest.raises(UnicodeEncodeError):
192+
key_provider = MasterKeyInfo(provider_id=invalid_provider_string, key_info=invalid_provider_string)
193+
194+
encrypted_data_key = EncryptedDataKey(
195+
key_provider=key_provider, encrypted_data_key=VALUES["encrypted_data_key"]
196+
)
197+
198+
# When: serialize_encrypted_data_key
199+
aws_encryption_sdk.internal.formatting.serialize.serialize_encrypted_data_key(
200+
encrypted_data_key=encrypted_data_key
201+
)
202+
203+
# Then: raises UnicodeEncodeError
204+
with pytest.raises(UnicodeEncodeError):
205+
key_provider = MasterKeyInfo(provider_id=invalid_provider_string, key_info="abc")
206+
207+
encrypted_data_key = EncryptedDataKey(
208+
key_provider=key_provider, encrypted_data_key=VALUES["encrypted_data_key"]
209+
)
210+
211+
# When: serialize_encrypted_data_key
212+
aws_encryption_sdk.internal.formatting.serialize.serialize_encrypted_data_key(
213+
encrypted_data_key=encrypted_data_key
214+
)
215+
216+
# Then: raises UnicodeEncodeError
217+
with pytest.raises(UnicodeEncodeError):
218+
key_provider = MasterKeyInfo(provider_id="abc", key_info=invalid_provider_string)
219+
220+
encrypted_data_key = EncryptedDataKey(
221+
key_provider=key_provider, encrypted_data_key=VALUES["encrypted_data_key"]
222+
)
223+
224+
# When: serialize_encrypted_data_key
225+
aws_encryption_sdk.internal.formatting.serialize.serialize_encrypted_data_key(
226+
encrypted_data_key=encrypted_data_key
227+
)
228+
83229
def test_serialize_header_v1(self):
84230
"""Validate that the _serialize_header function
85231
behaves as expected.

0 commit comments

Comments
 (0)