Skip to content

Commit 59e3bcb

Browse files
committed
PYTHON-1883 Implement ClientEncryption class
1 parent a763461 commit 59e3bcb

File tree

4 files changed

+230
-13
lines changed

4 files changed

+230
-13
lines changed

doc/api/pymongo/encryption.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:mod:`encryption` -- Client side encryption
2+
===========================================
3+
4+
.. automodule:: pymongo.encryption
5+
:members:

doc/api/pymongo/index.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ Sub-modules:
4141
cursor_manager
4242
database
4343
driver_info
44-
errors
44+
encryption
4545
encryption_options
46+
errors
4647
message
4748
mongo_client
4849
mongo_replica_set_client

pymongo/encryption.py

Lines changed: 180 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,33 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Client side encryption implementation."""
15+
"""Client side encryption."""
1616

1717
import subprocess
18+
import uuid
1819
import weakref
1920

20-
from pymongocrypt.auto_encrypter import AutoEncrypter
21-
from pymongocrypt.errors import MongoCryptError
22-
from pymongocrypt.mongocrypt import MongoCryptOptions
23-
from pymongocrypt.state_machine import MongoCryptCallback
24-
25-
from bson import _bson_to_dict, _dict_to_bson
21+
try:
22+
from pymongocrypt.auto_encrypter import AutoEncrypter
23+
from pymongocrypt.errors import MongoCryptError
24+
from pymongocrypt.explicit_encrypter import ExplicitEncrypter
25+
from pymongocrypt.mongocrypt import MongoCryptOptions
26+
from pymongocrypt.state_machine import MongoCryptCallback
27+
_HAVE_PYMONGOCRYPT = True
28+
except ImportError:
29+
_HAVE_PYMONGOCRYPT = False
30+
MongoCryptCallback = object
31+
32+
from bson import _bson_to_dict, _dict_to_bson, decode, encode
2633
from bson.binary import STANDARD
2734
from bson.codec_options import CodecOptions
2835
from bson.raw_bson import (DEFAULT_RAW_BSON_OPTIONS,
2936
RawBSONDocument,
3037
_inflate_bson)
3138
from bson.son import SON
3239

33-
from pymongo.errors import (EncryptionError,
40+
from pymongo.errors import (ConfigurationError,
41+
EncryptionError,
3442
ServerSelectionTimeoutError)
3543
from pymongo.mongo_client import MongoClient
3644
from pymongo.pool import _configured_socket, PoolOptions
@@ -52,7 +60,10 @@ class _EncryptionIO(MongoCryptCallback):
5260
def __init__(self, client, key_vault_coll, mongocryptd_client, opts):
5361
"""Internal class to perform I/O on behalf of pymongocrypt."""
5462
# Use a weak ref to break reference cycle.
55-
self.client_ref = weakref.ref(client)
63+
if client is not None:
64+
self.client_ref = weakref.ref(client)
65+
else:
66+
self.client_ref = None
5667
self.key_vault_coll = key_vault_coll.with_options(
5768
codec_options=_KEY_VAULT_OPTS)
5869
self.mongocryptd_client = mongocryptd_client
@@ -167,15 +178,29 @@ def insert_data_key(self, data_key):
167178
res = self.key_vault_coll.insert_one(doc)
168179
return res.inserted_id
169180

181+
def bson_encode(self, doc):
182+
"""Encode a document to BSON.
183+
184+
A document can be any mapping type (like :class:`dict`).
185+
186+
:Parameters:
187+
- `doc`: mapping type representing a document
188+
189+
:Returns:
190+
The encoded BSON bytes.
191+
"""
192+
return encode(doc)
193+
170194
def close(self):
171195
"""Release resources.
172196
173197
Note it is not safe to call this method from __del__ or any GC hooks.
174198
"""
175199
self.client_ref = None
176200
self.key_vault_coll = None
177-
self.mongocryptd_client.close()
178-
self.mongocryptd_client = None
201+
if self.mongocryptd_client:
202+
self.mongocryptd_client.close()
203+
self.mongocryptd_client = None
179204

180205

181206
class _Encrypter(object):
@@ -262,3 +287,147 @@ def create(client, opts):
262287
io_callbacks = _EncryptionIO(
263288
client, key_vault_coll, mongocryptd_client, opts)
264289
return _Encrypter(io_callbacks, opts)
290+
291+
292+
class Algorithm(object):
293+
"""An enum that defines the supported encryption algorithms."""
294+
Deterministic = "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic"
295+
Random = "AEAD_AES_256_CBC_HMAC_SHA_512-Random"
296+
297+
298+
class ClientEncryption(object):
299+
"""Explicit client side encryption."""
300+
301+
def __init__(self, kms_providers, key_vault_namespace, key_vault_client):
302+
"""Explicit client side encryption.
303+
304+
The ClientEncryption class encapsulates explicit operations on a key
305+
vault collection that cannot be done directly on a MongoClient. Similar
306+
to configuring auto encryption on a MongoClient, it is constructed with
307+
a MongoClient (to a MongoDB cluster containing the key vault
308+
collection), KMS provider configuration, and keyVaultNamespace. It
309+
provides an API for explicitly encrypting and decrypting values, and
310+
creating data keys. It does not provide an API to query keys from the
311+
key vault collection, as this can be done directly on the MongoClient.
312+
313+
:Parameters:
314+
- `kms_providers`: Map of KMS provider options. Two KMS providers
315+
are supported: "aws" and "local". The kmsProviders map values
316+
differ by provider:
317+
318+
- `aws`: Map with "accessKeyId" and "secretAccessKey" as strings.
319+
These are the AWS access key ID and AWS secret access key used
320+
to generate KMS messages.
321+
- `local`: Map with "key" as a 96-byte array or string. "key"
322+
is the master key used to encrypt/decrypt data keys. This key
323+
should be generated and stored as securely as possible.
324+
325+
- `key_vault_namespace`: The namespace for the key vault collection.
326+
The key vault collection contains all data keys used for encryption
327+
and decryption. Data keys are stored as documents in this MongoDB
328+
collection. Data keys are protected with encryption by a KMS
329+
provider.
330+
- `key_vault_client`: A MongoClient connected to a MongoDB cluster
331+
containing the `key_vault_namespace` collection.
332+
333+
.. versionadded:: 3.9
334+
"""
335+
if not _HAVE_PYMONGOCRYPT:
336+
raise ConfigurationError(
337+
"client side encryption requires the pymongocrypt library: "
338+
"install a compatible version with: "
339+
"python -m pip install pymongo['encryption']")
340+
341+
self._kms_providers = kms_providers
342+
self._key_vault_namespace = key_vault_namespace
343+
self._key_vault_client = key_vault_client
344+
345+
db, coll = key_vault_namespace.split('.', 1)
346+
key_vault_coll = key_vault_client[db][coll]
347+
348+
self._io_callbacks = _EncryptionIO(None, key_vault_coll, None, None)
349+
self._encryption = ExplicitEncrypter(
350+
self._io_callbacks, MongoCryptOptions(kms_providers, None))
351+
352+
def create_data_key(self, kms_provider, master_key=None,
353+
key_alt_names=None):
354+
"""Create and insert a new data key into the key vault collection.
355+
356+
:Parameters:
357+
- `kms_provider`: The KMS provider to use. Supported values are
358+
"aws" and "local".
359+
- `master_key`: The `master_key` identifies a KMS-specific key used
360+
to encrypt the new data key. If the kmsProvider is "local" the
361+
`master_key` is not applicable and may be omitted.
362+
If the `kms_provider` is "aws", `master_key` is required and must
363+
have the following fields:
364+
365+
- `region` (string): The AWS region as a string.
366+
- `key` (string): The Amazon Resource Name (ARN) to the AWS
367+
customer master key (CMK).
368+
369+
- `key_alt_names` (optional): An optional list of string alternate
370+
names used to reference a key. If a key is created with alternate
371+
names, then encryption may refer to the key by the unique alternate
372+
name instead of by ``key_id``. The following example shows creating
373+
and referring to a data key by alternate name::
374+
375+
client_encryption.create_data_key("local", keyAltNames=["name1"])
376+
# reference the key with the alternate name
377+
client_encryption.encrypt("457-55-5462", keyAltName="name1",
378+
algorithm=Algorithm.Random)
379+
380+
:Returns:
381+
The ``_id`` of the created data key document.
382+
"""
383+
return self._encryption.create_data_key(
384+
kms_provider, master_key=master_key, key_alt_names=key_alt_names)
385+
386+
def encrypt(self, value, algorithm, key_id=None, key_alt_name=None):
387+
"""Encrypt a BSON value with a given key and algorithm.
388+
389+
Note that exactly one of ``key_id`` or ``key_alt_name`` must be
390+
provided.
391+
392+
:Parameters:
393+
- `value`: The BSON value to encrypt.
394+
- `algorithm` (string): The encryption algorithm to use. See
395+
:class:`Algorithm` for some valid options.
396+
- `key_id`: Identifies a data key by ``_id`` which must be a UUID
397+
or a :class:`~bson.binary.Binary` with subtype 4.
398+
- `key_alt_name`: Identifies a key vault document by 'keyAltName'.
399+
400+
:Returns:
401+
The encrypted value, a :class:`~bson.binary.Binary` with subtype 6.
402+
"""
403+
# TODO: Add a required codec_options argument for encoding?
404+
doc = encode({'v': value})
405+
if isinstance(key_id, uuid.UUID):
406+
raw_key_id = key_id.bytes
407+
else:
408+
raw_key_id = key_id
409+
encrypted_doc = self._encryption.encrypt(
410+
doc, algorithm, key_id=raw_key_id, key_alt_name=key_alt_name)
411+
return decode(encrypted_doc)['v']
412+
413+
def decrypt(self, value):
414+
"""Decrypt an encrypted value.
415+
416+
:Parameters:
417+
- `value` (Binary): The encrypted value, a
418+
:class:`~bson.binary.Binary` with subtype 6.
419+
420+
:Returns:
421+
The decrypted BSON value.
422+
"""
423+
doc = encode({'v': value})
424+
decrypted_doc = self._encryption.decrypt(doc)
425+
# TODO: Add a required codec_options argument for decoding?
426+
return decode(decrypted_doc)['v']
427+
428+
def close(self):
429+
"""Release resources."""
430+
self._io_callbacks.close()
431+
self._encryption.close()
432+
self._io_callbacks = None
433+
self._encryption = None

test/test_encryption.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import os
1818
import socket
1919
import sys
20+
import uuid
2021

2122
sys.path[0:0] = [""]
2223

@@ -28,8 +29,8 @@
2829
from bson.son import SON
2930

3031
from pymongo.errors import ConfigurationError
31-
from pymongo.mongo_client import MongoClient
3232
from pymongo.encryption_options import AutoEncryptionOpts, _HAVE_PYMONGOCRYPT
33+
from pymongo.mongo_client import MongoClient
3334
from pymongo.write_concern import WriteConcern
3435

3536
from test import unittest, IntegrationTest, PyMongoTestCase, client_context
@@ -42,6 +43,10 @@
4243
from pymongocrypt.binding import init
4344
init(os.environ.get('MONGOCRYPT_LIB', 'mongocrypt'))
4445

46+
# This has to be imported after calling init().
47+
from pymongo.encryption import (Algorithm,
48+
ClientEncryption)
49+
4550

4651
def get_client_opts(client):
4752
return client._MongoClient__options
@@ -234,6 +239,43 @@ def test_auto_encrypt_local_schema_map(self):
234239
self._test_auto_encrypt(opts)
235240

236241

242+
class TestExplicitSimple(EncryptionIntegrationTest):
243+
244+
def test_encrypt_decrypt(self):
245+
client_encryption = ClientEncryption(
246+
KMS_PROVIDERS, 'admin.datakeys', client_context.client)
247+
self.addCleanup(client_encryption.close)
248+
# Use standard UUID representation.
249+
key_vault = client_context.client.admin.get_collection(
250+
'datakeys', codec_options=OPTS)
251+
self.addCleanup(key_vault.drop)
252+
253+
# Create the encrypted field's data key.
254+
key_id = client_encryption.create_data_key(
255+
'local', key_alt_names=['name'])
256+
self.assertIsInstance(key_id, uuid.UUID)
257+
self.assertTrue(key_vault.find_one({'_id': key_id}))
258+
259+
# Create an unused data key to make sure filtering works.
260+
unused_key_id = client_encryption.create_data_key(
261+
'local', key_alt_names=['unused'])
262+
self.assertIsInstance(unused_key_id, uuid.UUID)
263+
self.assertTrue(key_vault.find_one({'_id': unused_key_id}))
264+
265+
doc = {'_id': 0, 'ssn': '000'}
266+
encrypted_ssn = client_encryption.encrypt(
267+
doc['ssn'], Algorithm.Deterministic, key_id=key_id)
268+
269+
# Ensure encryption via key_alt_name for the same key produces the
270+
# same output.
271+
encrypted_ssn2 = client_encryption.encrypt(
272+
doc['ssn'], Algorithm.Deterministic, key_alt_name='name')
273+
self.assertEqual(encrypted_ssn, encrypted_ssn2)
274+
275+
# Test decryption.
276+
decrypted_ssn = client_encryption.decrypt(encrypted_ssn)
277+
self.assertEqual(decrypted_ssn, doc['ssn'])
278+
237279
# Spec tests
238280

239281
AWS_CREDS = {

0 commit comments

Comments
 (0)