Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
36bd1e5
INTPYTHON-527 Add Queryable Encryption support
aclark4life Jun 25, 2025
70c946b
encrypted fields map != encrypted fields
aclark4life Aug 27, 2025
cb3512c
Use dot separator
aclark4life Sep 16, 2025
46ca9dc
Refactor tests
aclark4life Sep 19, 2025
87b1dc9
Add embedding
aclark4life Sep 19, 2025
d39f3b3
Review feedback
aclark4life Sep 19, 2025
3f8b5c2
remove EncryptedEmbeddedModel
timgraham Sep 23, 2025
441c584
PatientRecord shouldn't be encrypted
timgraham Sep 23, 2025
c9cc301
fix linting of kms_provider() docstring line length
timgraham Sep 23, 2025
332decb
Code review fixes
aclark4life Sep 30, 2025
0317fba
Use EncryptionTestCase instead of TestCase
aclark4life Oct 2, 2025
e3da2f6
make atlas tests use encryption settings
timgraham Oct 2, 2025
fe790c7
try shared library
timgraham Oct 3, 2025
2270058
try ubuntu 22.04 just to be sure
timgraham Oct 3, 2025
e38b496
Code review fixes
aclark4life Oct 3, 2025
36c6bfd
Update QE guide with complete Python tutorial
aclark4life Oct 3, 2025
86486eb
update version added to 5.2.2
aclark4life Oct 3, 2025
59865f7
Add tests for EncryptedFieldMixin
aclark4life Oct 3, 2025
fc87e9f
Remove confusing paragraph about crypt shared
aclark4life Oct 3, 2025
75873e9
Target 5.2.3 for release and require MongoDB 8
aclark4life Oct 3, 2025
324c959
try Mongo 8.0.15 on CI
timgraham Oct 3, 2025
13ed19a
Misc updates
aclark4life Oct 4, 2025
b23c4f2
Misc updates
aclark4life Oct 6, 2025
a0cd197
Kill the helper
aclark4life Oct 7, 2025
65b96b2
Misc updates
aclark4life Oct 8, 2025
80881fa
Add assertEncrypted to verify field data is binary
aclark4life Oct 9, 2025
25e7da1
Fails on CI only
aclark4life Oct 9, 2025
ff84902
Remove create_data_keys until use case manifests
aclark4life Oct 10, 2025
23f20a4
Add partial index on keyAltNames for uniqueness
aclark4life Oct 10, 2025
01ec095
Code review updates
aclark4life Oct 10, 2025
8a3ccf5
Code review updates
aclark4life Oct 10, 2025
7b0956d
Add EncryptedArrayField + test
aclark4life Oct 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 15 additions & 48 deletions django_mongodb_backend/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from django_mongodb_backend.indexes import SearchIndex

from .fields import EmbeddedModelArrayField, EmbeddedModelField
from .fields import EmbeddedModelField
from .gis.schema import GISSchemaEditor
from .query import wrap_database_errors
from .utils import OperationCollector, model_has_encrypted_fields
Expand Down Expand Up @@ -488,36 +488,6 @@ def _create_collection(self, model):
# Unencrypted path
db.create_collection(db_table)

def _get_data_key(
self,
client_encryption,
key_vault_collection,
create_data_keys,
kms_provider,
master_key,
key_alt_name,
):
"""Return an existing or newly-created data key ID for a field."""
if create_data_keys:
if not client_encryption:
raise ImproperlyConfigured("client_encryption is not configured.")
return client_encryption.create_data_key(
kms_provider=kms_provider,
master_key=master_key,
key_alt_names=[key_alt_name],
)
if key_vault_collection is None:
raise ImproperlyConfigured(
f"Encrypted field {key_alt_name} detected but no key vault configured"
)
key = key_vault_collection.find_one({"keyAltNames": key_alt_name})
if not key:
raise ValueError(
f"No key found in keyvault for keyAltName={key_alt_name}. "
"Run with '--create-data-keys' to create missing keys."
)
return key["_id"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We lose this helpful message:

  File "/home/tim/code/django-mongodb/django_mongodb_backend/schema.py", line 541, in _get_encrypted_fields
    data_key = key["_id"]
               ~~~^^^^^^^
TypeError: 'NoneType' object is not subscriptable

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that logic was flawed. Updated to check for existing keys or create if not found.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then showencryptedfieldsmap isn't read-only and may have a side effect of creating keys?

What is still missing for me is the "why" of setting encrypted_fields_map in AutoEncryptionOpts. Based on past discussion in this PR, I recall it has something to do with security, but I don't think this is explained in this PR's documention or in the design doc.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then showencryptedfieldsmap isn't read-only and may have a side effect of creating keys?

I've removed --create-data-keys so now we're just talking about showencryptedfieldsmap and yes, since it calls _get_encrypted_fields it will create the data keys if they aren't found in the key vault.

What is still missing for me is the "why" of setting encrypted_fields_map in AutoEncryptionOpts. Based on past discussion in this PR, I recall it has something to do with security, but I don't think this is explained in this PR's documention or in the design doc.

That's in the spec:

Supplying an encryptedFieldsMap provides more security than relying on an encryptedFields obtained from the server. It protects against a malicious server advertising a false encryptedFields.

I don't fully understand that explanation so I haven't gone too far in documenting it other than to say "recommended".

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's necessary to understand how it works so we can document the proper workflow. For example, it feels to me like the sequence of actions might be: deploy your app, migrate, run showencryptedfieldsmap, put the result in your settings. (i.e. you should not deploy with encrypted_fields_map already set, because those keyIds won't be used as I described in #329 (comment)). (It's possible I'm wrong or that I made a mistake, but we cannot ship a feature with uncertainties like this.)

Copy link
Collaborator Author

@aclark4life aclark4life Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's necessary to understand how it works so we can document the proper workflow. For example, it feels to me like the sequence of actions might be: deploy your app, migrate, run showencryptedfieldsmap, put the result in your settings. (i.e. you should not deploy with encrypted_fields_map already set, because those keyIds won't be used as I described in #329 (comment)). (It's possible I'm wrong or that I made a mistake, but we cannot ship a feature with uncertainties like this.)

I don't mean that we don't understand what we are building, I mean I'm not sure I understand how "a malicious server advertising a false encryptedFields" happens. In any event, mostly agree on the flow. I'm picturing:

  • Develop and test local
  • Deploy and run migrate
  • Run showencryptedfieldsmap and set encrypted_fields_map in auto_encryption_opts
  • Deploy again

At first I was picturing a scenario in which the second deployment wasn't needed, but I think client_encryption.create_data_key makes that very difficult if not impossible, thus invalidating the "single deployment" use case.


def _get_encrypted_fields(
self, model, create_data_keys=False, key_alt_name=None, path_prefix=None
):
Expand All @@ -532,26 +502,22 @@ def _get_encrypted_fields(
path_prefix = path_prefix or ""

options = client._options
auto_encryption_opts = getattr(options, "auto_encryption_opts", None)
auto_encryption_opts = options.auto_encryption_opts

This comment was marked as resolved.


key_vault_collection = None
if auto_encryption_opts:
key_vault_db, key_vault_coll = auto_encryption_opts._key_vault_namespace.split(".", 1)
key_vault_collection = client[key_vault_db][key_vault_coll]
key_vault_db, key_vault_coll = auto_encryption_opts._key_vault_namespace.split(".", 1)
key_vault_collection = client[key_vault_db][key_vault_coll]

kms_provider = router.kms_provider(model)
master_key = connection.settings_dict.get("KMS_CREDENTIALS", {}).get(kms_provider)
client_encryption = getattr(self.connection, "client_encryption", None)
client_encryption = self.connection.client_encryption

field_list = []

for field in fields:
new_key_alt_name = f"{key_alt_name}.{field.column}"
path = f"{path_prefix}.{field.column}" if path_prefix else field.column

if isinstance(field, (EmbeddedModelField, EmbeddedModelArrayField)) and not getattr(
field, "encrypted", False
):
if isinstance(field, EmbeddedModelField) and not getattr(field, "encrypted", False):
embedded_result = self._get_encrypted_fields(
field.embedded_model,
create_data_keys=create_data_keys,
Expand All @@ -564,14 +530,15 @@ def _get_encrypted_fields(

if getattr(field, "encrypted", False):
bson_type = field.db_type(connection)
data_key = self._get_data_key(
client_encryption,
key_vault_collection,
create_data_keys,
kms_provider,
master_key,
new_key_alt_name,
)
if create_data_keys:
data_key = client_encryption.create_data_key(
kms_provider=kms_provider,
master_key=master_key,
key_alt_names=[new_key_alt_name],
)
else:
key = key_vault_collection.find_one({"keyAltNames": new_key_alt_name})
data_key = key["_id"]
field_dict = {
"bsonType": bson_type,
"path": path,
Expand Down
12 changes: 6 additions & 6 deletions tests/encryption_/test_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,22 @@
from .test_base import EncryptionTestCase


class EncryptedEmbeddedModelTests(EncryptionTestCase):
class EmbeddedModelTests(EncryptionTestCase):
def setUp(self):
self.billing = Billing(cc_type="Visa", cc_number="4111111111111111")
self.patient_record = PatientRecord(ssn="123-45-6789", billing=self.billing)
self.patient = Patient.objects.create(
patient_name="John Doe", patient_id=123456789, patient_record=self.patient_record
)

def test_patient(self):
def test_object(self):
patient = Patient.objects.get(id=self.patient.id)
self.assertEqual(patient.patient_record.ssn, "123-45-6789")
self.assertEqual(patient.patient_record.billing.cc_type, "Visa")
self.assertEqual(patient.patient_record.billing.cc_number, "4111111111111111")


class EncryptedEmbeddedModelArrayTests(EncryptionTestCase):
class EmbeddedModelArrayTests(EncryptionTestCase):
def setUp(self):
self.actor1 = Actor(name="Actor One")
self.actor2 = Actor(name="Actor Two")
Expand All @@ -56,13 +56,13 @@ def setUp(self):
cast=[self.actor1, self.actor2],
)

def test_movie_actors(self):
def test_array(self):
self.assertEqual(len(self.movie.cast), 2)
self.assertEqual(self.movie.cast[0].name, "Actor One")
self.assertEqual(self.movie.cast[1].name, "Actor Two")


class EncryptedFieldTests(EncryptionTestCase):
class FieldTests(EncryptionTestCase):
def assertEquality(self, model_cls, val):
model_cls.objects.create(value=val)
fetched = model_cls.objects.get(value=val)
Expand Down Expand Up @@ -162,7 +162,7 @@ def test_time(self):
)


class EncryptedFieldMixinTests(EncryptionTestCase):
class FieldMixinTests(EncryptionTestCase):
def test_null_true_raises_error(self):
with self.assertRaisesMessage(
ValueError, "'null=True' is not supported for encrypted fields."
Expand Down