From 38cf2b4bef7f32ed832e51493df52d70fa00f10d Mon Sep 17 00:00:00 2001 From: Tim Graham Date: Sun, 4 May 2025 19:52:47 -0400 Subject: [PATCH 1/2] Move assertAddRemoveIndex() to a mixin --- tests/indexes_/test_base.py | 24 ++++++++++++++++++++++++ tests/indexes_/test_condition.py | 14 ++------------ 2 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 tests/indexes_/test_base.py diff --git a/tests/indexes_/test_base.py b/tests/indexes_/test_base.py new file mode 100644 index 000000000..d8bc12cf5 --- /dev/null +++ b/tests/indexes_/test_base.py @@ -0,0 +1,24 @@ +from django.db import connection + + +class SchemaAssertionMixin: + def assertAddRemoveIndex(self, editor, model, index): + with self.assertNumQueries(1): + editor.add_index(index=index, model=model) + try: + self.assertIn( + index.name, + connection.introspection.get_constraints( + cursor=None, + table_name=model._meta.db_table, + ), + ) + finally: + editor.remove_index(index=index, model=model) + self.assertNotIn( + index.name, + connection.introspection.get_constraints( + cursor=None, + table_name=model._meta.db_table, + ), + ) diff --git a/tests/indexes_/test_condition.py b/tests/indexes_/test_condition.py index 3c49f1ec3..b614c10cc 100644 --- a/tests/indexes_/test_condition.py +++ b/tests/indexes_/test_condition.py @@ -5,20 +5,10 @@ from django.test import TestCase from .models import Article +from .test_base import SchemaAssertionMixin -class PartialIndexTests(TestCase): - def assertAddRemoveIndex(self, editor, model, index): - editor.add_index(index=index, model=model) - self.assertIn( - index.name, - connection.introspection.get_constraints( - cursor=None, - table_name=model._meta.db_table, - ), - ) - editor.remove_index(index=index, model=model) - +class PartialIndexTests(SchemaAssertionMixin, TestCase): def test_not_supported(self): msg = "MongoDB does not support the 'isnull' lookup in indexes." with connection.schema_editor() as editor, self.assertRaisesMessage(NotSupportedError, msg): From 8804c873a22ed6cad0baab4db348b8fcdee8bb33 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Sun, 4 May 2025 19:48:13 -0400 Subject: [PATCH 2/2] Add SearchIndex and VectorSearchIndex Co-authored-by: Tim Graham --- .github/workflows/mongodb_settings.py | 3 + .github/workflows/start_local_atlas.sh | 24 ++ .github/workflows/test-python-atlas.yml | 56 ++++ django_mongodb_backend/__init__.py | 2 + django_mongodb_backend/checks.py | 32 ++ django_mongodb_backend/features.py | 17 + .../fields/embedded_model.py | 3 + django_mongodb_backend/indexes.py | 183 ++++++++++- django_mongodb_backend/introspection.py | 35 ++- django_mongodb_backend/schema.py | 15 +- django_mongodb_backend/utils.py | 3 + docs/source/conf.py | 1 + docs/source/index.rst | 1 + docs/source/ref/models/index.rst | 1 + docs/source/ref/models/indexes.rst | 55 ++++ docs/source/releases/5.2.x.rst | 13 + requirements.txt | 2 +- tests/indexes_/models.py | 22 ++ tests/indexes_/test_checks.py | 222 +++++++++++++ tests/indexes_/test_search_indexes.py | 292 ++++++++++++++++++ 20 files changed, 976 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/start_local_atlas.sh create mode 100644 .github/workflows/test-python-atlas.yml create mode 100644 django_mongodb_backend/checks.py create mode 100644 docs/source/ref/models/indexes.rst create mode 100644 tests/indexes_/test_checks.py create mode 100644 tests/indexes_/test_search_indexes.py diff --git a/.github/workflows/mongodb_settings.py b/.github/workflows/mongodb_settings.py index bdcc696cb..49d44a5fc 100644 --- a/.github/workflows/mongodb_settings.py +++ b/.github/workflows/mongodb_settings.py @@ -17,10 +17,13 @@ "default": { "ENGINE": "django_mongodb_backend", "NAME": "djangotests", + # Required when connecting to the Atlas image in Docker. + "OPTIONS": {"directConnection": True}, }, "other": { "ENGINE": "django_mongodb_backend", "NAME": "djangotests-other", + "OPTIONS": {"directConnection": True}, }, } diff --git a/.github/workflows/start_local_atlas.sh b/.github/workflows/start_local_atlas.sh new file mode 100644 index 000000000..95eb5d1f3 --- /dev/null +++ b/.github/workflows/start_local_atlas.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -eu + +echo "Starting the container" + +IMAGE=${1:-mongodb/mongodb-atlas-local:latest} +DOCKER=$(which docker || which podman) + +$DOCKER pull $IMAGE + +$DOCKER kill mongodb_atlas_local || true + +CONTAINER_ID=$($DOCKER run --rm -d --name mongodb_atlas_local -p 27017:27017 $IMAGE) + +function wait() { + CONTAINER_ID=$1 + echo "waiting for container to become healthy..." + $DOCKER logs mongodb_atlas_local +} + +wait "$CONTAINER_ID" + +# Sleep for a bit to let all services start. +sleep 5 diff --git a/.github/workflows/test-python-atlas.yml b/.github/workflows/test-python-atlas.yml new file mode 100644 index 000000000..175dfe183 --- /dev/null +++ b/.github/workflows/test-python-atlas.yml @@ -0,0 +1,56 @@ +name: Python Tests on Atlas + +on: + pull_request: + paths: + - '**.py' + - '!setup.py' + - '.github/workflows/test-python-atlas.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash -eux {0} + +jobs: + build: + name: Django Test Suite + runs-on: ubuntu-latest + steps: + - name: Checkout django-mongodb-backend + uses: actions/checkout@v4 + with: + persist-credentials: false + - name: install django-mongodb-backend + run: | + pip3 install --upgrade pip + pip3 install -e . + - name: Checkout Django + uses: actions/checkout@v4 + with: + repository: 'mongodb-forks/django' + ref: 'mongodb-5.2.x' + path: 'django_repo' + persist-credentials: false + - name: Install system packages for Django's Python test dependencies + run: | + sudo apt-get update + sudo apt-get install libmemcached-dev + - name: Install Django and its Python test dependencies + run: | + cd django_repo/tests/ + pip3 install -e .. + pip3 install -r requirements/py3.txt + - name: Copy the test settings file + run: cp .github/workflows/mongodb_settings.py django_repo/tests/ + - name: Copy the test runner file + run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py + - name: Start local Atlas + working-directory: . + run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:7 + - name: Run tests + run: python3 django_repo/tests/runtests_.py diff --git a/django_mongodb_backend/__init__.py b/django_mongodb_backend/__init__.py index 65b819b2a..6367e225b 100644 --- a/django_mongodb_backend/__init__.py +++ b/django_mongodb_backend/__init__.py @@ -7,6 +7,7 @@ check_django_compatability() from .aggregates import register_aggregates # noqa: E402 +from .checks import register_checks # noqa: E402 from .expressions import register_expressions # noqa: E402 from .fields import register_fields # noqa: E402 from .functions import register_functions # noqa: E402 @@ -17,6 +18,7 @@ __all__ = ["parse_uri"] register_aggregates() +register_checks() register_expressions() register_fields() register_functions() diff --git a/django_mongodb_backend/checks.py b/django_mongodb_backend/checks.py new file mode 100644 index 000000000..866afc4ba --- /dev/null +++ b/django_mongodb_backend/checks.py @@ -0,0 +1,32 @@ +from itertools import chain + +from django.apps import apps +from django.core.checks import Tags, register +from django.db import connections, router + + +def check_indexes(app_configs, databases=None, **kwargs): # noqa: ARG001 + """ + Call Index.check() on all model indexes. + + This function will be obsolete when Django calls Index.check() after + https://code.djangoproject.com/ticket/36273. + """ + errors = [] + if app_configs is None: + models = apps.get_models() + else: + models = chain.from_iterable(app_config.get_models() for app_config in app_configs) + for model in models: + for db in databases or (): + if not router.allow_migrate_model(db, model): + continue + connection = connections[db] + for model_index in model._meta.indexes: + if hasattr(model_index, "check"): + errors.extend(model_index.check(model, connection)) + return errors + + +def register_checks(): + register(check_indexes, Tags.models) diff --git a/django_mongodb_backend/features.py b/django_mongodb_backend/features.py index fda1e6b85..2670a5239 100644 --- a/django_mongodb_backend/features.py +++ b/django_mongodb_backend/features.py @@ -1,5 +1,6 @@ from django.db.backends.base.features import BaseDatabaseFeatures from django.utils.functional import cached_property +from pymongo.errors import OperationFailure class DatabaseFeatures(BaseDatabaseFeatures): @@ -548,3 +549,19 @@ def django_test_expected_failures(self): @cached_property def is_mongodb_6_3(self): return self.connection.get_database_version() >= (6, 3) + + @cached_property + def supports_atlas_search(self): + """Does the server support Atlas search queries and search indexes?""" + try: + # An existing collection must be used on MongoDB 6, otherwise + # the operation will not error when unsupported. + self.connection.get_collection("django_migrations").list_search_indexes() + except OperationFailure: + # It would be best to check the error message or error code to + # avoid hiding some other exception, but the message/code varies + # across MongoDB versions. Example error message: + # "$listSearchIndexes stage is only allowed on MongoDB Atlas". + return False + else: + return True diff --git a/django_mongodb_backend/fields/embedded_model.py b/django_mongodb_backend/fields/embedded_model.py index cf45eac8a..57bbd3f50 100644 --- a/django_mongodb_backend/fields/embedded_model.py +++ b/django_mongodb_backend/fields/embedded_model.py @@ -20,6 +20,9 @@ def __init__(self, embedded_model, *args, **kwargs): self.embedded_model = embedded_model super().__init__(*args, **kwargs) + def db_type(self, connection): + return "embeddedDocuments" + def check(self, **kwargs): from ..models import EmbeddedModel diff --git a/django_mongodb_backend/indexes.py b/django_mongodb_backend/indexes.py index 8ddceda76..aeff8cf33 100644 --- a/django_mongodb_backend/indexes.py +++ b/django_mongodb_backend/indexes.py @@ -1,12 +1,16 @@ +import itertools from collections import defaultdict +from django.core.checks import Error, Warning from django.db import NotSupportedError -from django.db.models import Index +from django.db.models import FloatField, Index, IntegerField from django.db.models.lookups import BuiltinLookup from django.db.models.sql.query import Query from django.db.models.sql.where import AND, XOR, WhereNode from pymongo import ASCENDING, DESCENDING -from pymongo.operations import IndexModel +from pymongo.operations import IndexModel, SearchIndexModel + +from django_mongodb_backend.fields import ArrayField from .query_utils import process_rhs @@ -101,6 +105,181 @@ def where_node_idx(self, compiler, connection): return mql +class SearchIndex(Index): + suffix = "six" + _error_id_prefix = "django_mongodb_backend.indexes.SearchIndex" + + def __init__(self, *, fields=(), name=None): + super().__init__(fields=fields, name=name) + + def check(self, model, connection): + errors = [] + if not connection.features.supports_atlas_search: + errors.append( + Warning( + f"This MongoDB server does not support {self.__class__.__name__}.", + hint=( + "The index won't be created. Use an Atlas-enabled version of MongoDB, " + "or silence this warning if you don't care about it." + ), + obj=model, + id=f"{self._error_id_prefix}.W001", + ) + ) + return errors + + def search_index_data_types(self, db_type): + """ + Map a model field's type to search index type. + https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings/#data-types + """ + if db_type in {"double", "int", "long"}: + return "number" + if db_type == "binData": + return "string" + if db_type == "bool": + return "boolean" + if db_type == "object": + return "document" + if db_type == "array": + return "embeddedDocuments" + return db_type + + def get_pymongo_index_model( + self, model, schema_editor, field=None, unique=False, column_prefix="" + ): + if not schema_editor.connection.features.supports_atlas_search: + return None + fields = {} + for field_name, _ in self.fields_orders: + field = model._meta.get_field(field_name) + type_ = self.search_index_data_types(field.db_type(schema_editor.connection)) + field_path = column_prefix + model._meta.get_field(field_name).column + fields[field_path] = {"type": type_} + return SearchIndexModel( + definition={"mappings": {"dynamic": False, "fields": fields}}, name=self.name + ) + + +class VectorSearchIndex(SearchIndex): + suffix = "vsi" + _error_id_prefix = "django_mongodb_backend.indexes.VectorSearchIndex" + VALID_FIELD_TYPES = frozenset(("boolean", "date", "number", "objectId", "string", "uuid")) + VALID_SIMILARITIES = frozenset(("cosine", "dotProduct", "euclidean")) + + def __init__(self, *, fields=(), name=None, similarities): + super().__init__(fields=fields, name=name) + self.similarities = similarities + self._multiple_similarities = isinstance(similarities, tuple | list) + for func in similarities if self._multiple_similarities else (similarities,): + if func not in self.VALID_SIMILARITIES: + raise ValueError( + f"'{func}' isn't a valid similarity function " + f"({', '.join(sorted(self.VALID_SIMILARITIES))})." + ) + seen_fields = set() + for field_name, _ in self.fields_orders: + if field_name in seen_fields: + raise ValueError(f"Field '{field_name}' is duplicated in fields.") + seen_fields.add(field_name) + + def check(self, model, connection): + errors = super().check(model, connection) + num_arrayfields = 0 + for field_name, _ in self.fields_orders: + field = model._meta.get_field(field_name) + if isinstance(field, ArrayField): + num_arrayfields += 1 + try: + int(field.size) + except (ValueError, TypeError): + errors.append( + Error( + f"VectorSearchIndex requires 'size' on field '{field_name}'.", + obj=model, + id=f"{self._error_id_prefix}.E002", + ) + ) + if not isinstance(field.base_field, FloatField | IntegerField): + errors.append( + Error( + "VectorSearchIndex requires the base field of " + f"ArrayField '{field.name}' to be FloatField or " + "IntegerField but is " + f"{field.base_field.get_internal_type()}.", + obj=model, + id=f"{self._error_id_prefix}.E003", + ) + ) + else: + search_type = self.search_index_data_types(field.db_type(connection)) + if search_type not in self.VALID_FIELD_TYPES: + errors.append( + Error( + "VectorSearchIndex does not support field " + f"'{field_name}' ({field.get_internal_type()}).", + obj=model, + id=f"{self._error_id_prefix}.E004", + hint=f"Allowed types are {', '.join(sorted(self.VALID_FIELD_TYPES))}.", + ) + ) + if self._multiple_similarities and num_arrayfields != len(self.similarities): + errors.append( + Error( + f"VectorSearchIndex requires the same number of similarities " + f"and vector fields; {model._meta.object_name} has " + f"{num_arrayfields} ArrayField(s) but similarities " + f"has {len(self.similarities)} element(s).", + obj=model, + id=f"{self._error_id_prefix}.E005", + ) + ) + if num_arrayfields == 0: + errors.append( + Error( + "VectorSearchIndex requires at least one ArrayField to " "store vector data.", + obj=model, + id=f"{self._error_id_prefix}.E006", + hint="If you want to perform search operations without vectors, " + "use SearchIndex instead.", + ) + ) + return errors + + def deconstruct(self): + path, args, kwargs = super().deconstruct() + kwargs["similarities"] = self.similarities + return path, args, kwargs + + def get_pymongo_index_model( + self, model, schema_editor, field=None, unique=False, column_prefix="" + ): + if not schema_editor.connection.features.supports_atlas_search: + return None + similarities = ( + itertools.cycle([self.similarities]) + if not self._multiple_similarities + else iter(self.similarities) + ) + fields = [] + for field_name, _ in self.fields_orders: + field_ = model._meta.get_field(field_name) + field_path = column_prefix + model._meta.get_field(field_name).column + mappings = {"path": field_path} + if isinstance(field_, ArrayField): + mappings.update( + { + "type": "vector", + "numDimensions": int(field_.size), + "similarity": next(similarities), + } + ) + else: + mappings["type"] = "filter" + fields.append(mappings) + return SearchIndexModel(definition={"fields": fields}, name=self.name, type="vectorSearch") + + def register_indexes(): BuiltinLookup.as_mql_idx = builtin_lookup_idx Index._get_condition_mql = _get_condition_mql diff --git a/django_mongodb_backend/introspection.py b/django_mongodb_backend/introspection.py index fee0f5fea..77e068072 100644 --- a/django_mongodb_backend/introspection.py +++ b/django_mongodb_backend/introspection.py @@ -2,6 +2,8 @@ from django.db.models import Index from pymongo import ASCENDING, DESCENDING +from django_mongodb_backend.indexes import SearchIndex, VectorSearchIndex + class DatabaseIntrospection(BaseDatabaseIntrospection): ORDER_DIR = {ASCENDING: "ASC", DESCENDING: "DESC"} @@ -9,7 +11,7 @@ class DatabaseIntrospection(BaseDatabaseIntrospection): def table_names(self, cursor=None, include_views=False): return sorted([x["name"] for x in self.connection.database.list_collections()]) - def get_constraints(self, cursor, table_name): + def _get_index_info(self, table_name): indexes = self.connection.get_collection(table_name).index_information() constraints = {} for name, details in indexes.items(): @@ -30,3 +32,34 @@ def get_constraints(self, cursor, table_name): "options": {}, } return constraints + + def _get_search_index_info(self, table_name): + if not self.connection.features.supports_atlas_search: + return {} + constraints = {} + indexes = self.connection.get_collection(table_name).list_search_indexes() + for details in indexes: + if details["type"] == "vectorSearch": + columns = [field["path"] for field in details["latestDefinition"]["fields"]] + type_ = VectorSearchIndex.suffix + options = details + else: + options = details["latestDefinition"]["mappings"] + columns = list(options.get("fields", {}).keys()) + type_ = SearchIndex.suffix + constraints[details["name"]] = { + "check": False, + "columns": columns, + "definition": None, + "foreign_key": None, + "index": True, + "orders": [], + "primary_key": False, + "type": type_, + "unique": False, + "options": options, + } + return constraints + + def get_constraints(self, cursor, table_name): + return {**self._get_index_info(table_name), **self._get_search_index_info(table_name)} diff --git a/django_mongodb_backend/schema.py b/django_mongodb_backend/schema.py index 8ae609186..da3ec9613 100644 --- a/django_mongodb_backend/schema.py +++ b/django_mongodb_backend/schema.py @@ -1,5 +1,8 @@ from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.models import Index, UniqueConstraint +from pymongo.operations import SearchIndexModel + +from django_mongodb_backend.indexes import SearchIndex from .fields import EmbeddedModelField from .query import wrap_database_errors @@ -265,7 +268,10 @@ def add_index( ) if idx: model = parent_model or model - self.get_collection(model._meta.db_table).create_indexes([idx]) + if isinstance(idx, SearchIndexModel): + self.get_collection(model._meta.db_table).create_search_index(idx) + else: + self.get_collection(model._meta.db_table).create_indexes([idx]) def _add_composed_index(self, model, field_names, column_prefix="", parent_model=None): """Add an index on the given list of field_names.""" @@ -283,7 +289,12 @@ def _add_field_index(self, model, field, *, column_prefix=""): def remove_index(self, model, index): if index.contains_expressions: return - self.get_collection(model._meta.db_table).drop_index(index.name) + if isinstance(index, SearchIndex): + # Drop the index if it's supported. + if self.connection.features.supports_atlas_search: + self.get_collection(model._meta.db_table).drop_search_index(index.name) + else: + self.get_collection(model._meta.db_table).drop_index(index.name) def _remove_composed_index( self, model, field_names, constraint_kwargs, column_prefix="", parent_model=None diff --git a/django_mongodb_backend/utils.py b/django_mongodb_backend/utils.py index 95e3b5a8e..194c4698c 100644 --- a/django_mongodb_backend/utils.py +++ b/django_mongodb_backend/utils.py @@ -107,11 +107,14 @@ class OperationDebugWrapper: "aggregate", "create_collection", "create_indexes", + "create_search_index", "drop", "index_information", "insert_many", "delete_many", "drop_index", + "drop_search_index", + "list_search_indexes", "rename", "update_many", } diff --git a/docs/source/conf.py b/docs/source/conf.py index d097e0df0..9b9f84e0a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -44,6 +44,7 @@ "mongodb": ("https://www.mongodb.com/docs/languages/python/django-mongodb/v5.1/", None), "pymongo": ("https://pymongo.readthedocs.io/en/stable/", None), "python": ("https://docs.python.org/3/", None), + "atlas": ("https://www.mongodb.com/docs/atlas/", None), } root_doc = "contents" diff --git a/docs/source/index.rst b/docs/source/index.rst index e81c6dde4..04097ab0b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -38,6 +38,7 @@ Models - :doc:`ref/models/fields` - :doc:`ref/models/querysets` - :doc:`ref/models/models` +- :doc:`ref/models/indexes` **Topic guides:** diff --git a/docs/source/ref/models/index.rst b/docs/source/ref/models/index.rst index d5cb63ce1..42f7a12bb 100644 --- a/docs/source/ref/models/index.rst +++ b/docs/source/ref/models/index.rst @@ -10,3 +10,4 @@ Model API reference. fields querysets models + indexes diff --git a/docs/source/ref/models/indexes.rst b/docs/source/ref/models/indexes.rst new file mode 100644 index 000000000..cae18fd1c --- /dev/null +++ b/docs/source/ref/models/indexes.rst @@ -0,0 +1,55 @@ +===================== +Model index reference +===================== + +.. module:: django_mongodb_backend.indexes + :synopsis: Database indexes for MongoDB. + +Some MongoDB-specific :doc:`indexes `, for use on a +model's :attr:`Meta.indexes ` option, are +available in ``django_mongodb_backend.indexes``. + +``SearchIndex`` +=============== + +.. class:: SearchIndex(fields=(), name=None) + +.. versionadded:: 5.2.0b0 + +Creates a basic :doc:`search index ` on +the given field(s). + +Some fields such as :class:`~django.db.models.DecimalField` aren't +supported. See the :ref:`Atlas documentation ` for a +complete list of unsupported data types. + +If ``name`` isn't provided, one will be generated automatically. If you need +to reference the name in your search query and don't provide your own name, +you can lookup the generated one using ``Model._meta.indexes[0].name`` +(substiting the name of your model as well as a different list index if your +model has multiple indexes). + +``VectorSearchIndex`` +===================== + +.. class:: VectorSearchIndex(*, fields=(), name=None, similarities) + +.. versionadded:: 5.2.0b0 + +A subclass of :class:`SearchIndex` that creates a :doc:`vector search index +` on the given field(s). + +The index must reference at least one vector field: an :class:`.ArrayField` +with a :attr:`~.ArrayField.base_field` of :class:`~django.db.models.FloatField` +or :class:`~django.db.models.IntegerField` and a :attr:`~.ArrayField.size`. It +cannot reference an :class:`.ArrayField` of any other type. + +It may also have other fields to filter on, provided the field stores +``boolean``, ``date``, ``objectId``, ``numeric``, ``string``, or ``uuid``. + +Available values for the required ``similarities`` keyword argument are +``"cosine"``, ``"dotProduct"``, and ``"euclidean"`` (see +:ref:`atlas:avs-similarity-functions` for how to choose). You can provide this +value either a string, in which case that value will be applied to all vector +fields, or a list or tuple of values with a similarity corresponding to each +vector field. diff --git a/docs/source/releases/5.2.x.rst b/docs/source/releases/5.2.x.rst index 29870179a..d6bdc937d 100644 --- a/docs/source/releases/5.2.x.rst +++ b/docs/source/releases/5.2.x.rst @@ -13,6 +13,19 @@ Initial release from the state of :ref:`django-mongodb-backend 5.1.0 beta 2 Regarding new features in Django 5.2, :class:`~django.db.models.CompositePrimaryKey` isn't supported. +New features +------------ + +*These features won't appear in Django MongoDB Backend 5.1.x.* + +- Added :class:`.SearchIndex` and :class:`.VectorSearchIndex` for use on + a model's :attr:`Meta.indexes `. + +Backwards incompatible changes +------------------------------ + +- The minimum supported version of ``pymongo`` is increased from 4.6 to 4.7. + Bug fixes --------- diff --git a/requirements.txt b/requirements.txt index 5f081ffd7..b69357196 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ django>=5.2,<6.0 -pymongo>=4.6,<5.0 +pymongo>=4.7,<5.0 diff --git a/tests/indexes_/models.py b/tests/indexes_/models.py index acfeb5816..6e50c083e 100644 --- a/tests/indexes_/models.py +++ b/tests/indexes_/models.py @@ -1,7 +1,29 @@ from django.db import models +from django_mongodb_backend.fields import ArrayField, EmbeddedModelField, ObjectIdField +from django_mongodb_backend.models import EmbeddedModel + class Article(models.Model): headline = models.CharField(max_length=100) number = models.IntegerField() body = models.TextField() + + +class Data(EmbeddedModel): + integer = models.IntegerField() + + +class SearchIndexTestModel(models.Model): + big_integer = models.BigIntegerField() + binary = models.BinaryField() + boolean = models.BooleanField() + char = models.CharField(max_length=100) + datetime = models.DateTimeField(auto_now=True) + embedded_model = EmbeddedModelField(Data) + float = models.FloatField() + integer = models.IntegerField() + json = models.JSONField() + object_id = ObjectIdField() + vector_float = ArrayField(models.FloatField(), size=10) + vector_integer = ArrayField(models.IntegerField(), size=10) diff --git a/tests/indexes_/test_checks.py b/tests/indexes_/test_checks.py new file mode 100644 index 000000000..c39d88c7d --- /dev/null +++ b/tests/indexes_/test_checks.py @@ -0,0 +1,222 @@ +from unittest import mock + +from django.core import checks +from django.db import connection, models +from django.test import TestCase +from django.test.utils import isolate_apps, override_system_checks + +from django_mongodb_backend.checks import check_indexes +from django_mongodb_backend.fields import ArrayField, ObjectIdField +from django_mongodb_backend.indexes import SearchIndex, VectorSearchIndex +from django_mongodb_backend.models import EmbeddedModel + + +@isolate_apps("indexes_", attr_name="apps") +@override_system_checks([check_indexes]) +@mock.patch.object(connection.features, "supports_atlas_search", False) +class UnsupportedSearchIndexesTests(TestCase): + def test_search_requires_atlas_search_support(self): + class Article(models.Model): + title = models.CharField(max_length=10) + + class Meta: + indexes = [SearchIndex(fields=["title"])] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Warning( + "This MongoDB server does not support SearchIndex.", + hint=( + "The index won't be created. Use an Atlas-enabled version of MongoDB, " + "or silence this warning if you don't care about it." + ), + obj=Article, + id="django_mongodb_backend.indexes.SearchIndex.W001", + ) + ], + ) + + def test_vector_search_requires_atlas_search_support(self): + class Article(models.Model): + title = models.CharField(max_length=10) + vector = ArrayField(models.FloatField(), size=10) + + class Meta: + indexes = [VectorSearchIndex(fields=["title", "vector"], similarities="cosine")] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Warning( + "This MongoDB server does not support VectorSearchIndex.", + hint=( + "The index won't be created. Use an Atlas-enabled version of MongoDB, " + "or silence this warning if you don't care about it." + ), + obj=Article, + id="django_mongodb_backend.indexes.VectorSearchIndex.W001", + ) + ], + ) + + +@isolate_apps("indexes_", attr_name="apps") +@override_system_checks([check_indexes]) +@mock.patch.object(connection.features, "supports_atlas_search", True) +class InvalidVectorSearchIndexesTests(TestCase): + def test_requires_size(self): + class Article(models.Model): + title_embedded = ArrayField(models.FloatField()) + + class Meta: + indexes = [VectorSearchIndex(fields=["title_embedded"], similarities="cosine")] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Error( + "VectorSearchIndex requires 'size' on field 'title_embedded'.", + id="django_mongodb_backend.indexes.VectorSearchIndex.E002", + obj=Article, + ) + ], + ) + + def test_requires_float_inner_field(self): + class Article(models.Model): + title_embedded = ArrayField(models.CharField(), size=30) + + class Meta: + indexes = [VectorSearchIndex(fields=["title_embedded"], similarities="cosine")] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Error( + "VectorSearchIndex requires the base field of ArrayField " + "'title_embedded' to be FloatField or IntegerField but is CharField.", + id="django_mongodb_backend.indexes.VectorSearchIndex.E003", + obj=Article, + ) + ], + ) + + def test_unsupported_type(self): + class Article(models.Model): + data = models.JSONField() + vector = ArrayField(models.FloatField(), size=10) + + class Meta: + indexes = [VectorSearchIndex(fields=["data", "vector"], similarities="cosine")] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Error( + "VectorSearchIndex does not support field 'data' (JSONField).", + id="django_mongodb_backend.indexes.VectorSearchIndex.E004", + obj=Article, + hint="Allowed types are boolean, date, number, objectId, string, uuid.", + ) + ], + ) + + def test_fields_and_similarities_mismatch(self): + class Article(models.Model): + vector = ArrayField(models.FloatField(), size=10) + + class Meta: + indexes = [ + VectorSearchIndex( + fields=["vector"], + similarities=["dotProduct", "cosine"], + ) + ] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Error( + "VectorSearchIndex requires the same number of similarities " + "and vector fields; Article has 1 ArrayField(s) but similarities " + "has 2 element(s).", + id="django_mongodb_backend.indexes.VectorSearchIndex.E005", + obj=Article, + ), + ], + ) + + def test_simple(self): + class Article(models.Model): + vector = ArrayField(models.FloatField(), size=10) + + class Meta: + indexes = [VectorSearchIndex(fields=["vector"], similarities="cosine")] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual(errors, []) + + def test_valid_fields(self): + class Data(EmbeddedModel): + integer = models.IntegerField() + + class SearchIndexTestModel(models.Model): + text = models.CharField(max_length=100) + object_id = ObjectIdField() + number = models.IntegerField() + vector_integer = ArrayField(models.IntegerField(), size=10) + vector_float = ArrayField(models.FloatField(), size=10) + boolean = models.BooleanField() + date = models.DateTimeField(auto_now=True) + + class Meta: + indexes = [ + VectorSearchIndex( + name="recent_test_idx", + fields=[ + "text", + "object_id", + "number", + "vector_integer", + "vector_float", + "boolean", + "date", + ], + similarities="cosine", + ) + ] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual(errors, []) + + def test_requires_vector_field(self): + class NoSearchVectorModel(models.Model): + text = models.CharField(max_length=100) + + class Meta: + indexes = [ + VectorSearchIndex( + name="recent_test_idx", fields=["text"], similarities="cosine" + ) + ] + + errors = checks.run_checks(app_configs=self.apps.get_app_configs(), databases={"default"}) + self.assertEqual( + errors, + [ + checks.Error( + "VectorSearchIndex requires at least one ArrayField to " "store vector data.", + id="django_mongodb_backend.indexes.VectorSearchIndex.E006", + obj=NoSearchVectorModel, + hint="If you want to perform search operations without vectors, " + "use SearchIndex instead.", + ), + ], + ) diff --git a/tests/indexes_/test_search_indexes.py b/tests/indexes_/test_search_indexes.py new file mode 100644 index 000000000..d9a7348d6 --- /dev/null +++ b/tests/indexes_/test_search_indexes.py @@ -0,0 +1,292 @@ +from unittest import mock + +from django.db import connection +from django.test import SimpleTestCase, TestCase, skipUnlessDBFeature + +from django_mongodb_backend.indexes import SearchIndex, VectorSearchIndex + +from .models import SearchIndexTestModel +from .test_base import SchemaAssertionMixin + + +@mock.patch.object(connection.features, "supports_atlas_search", False) +class UnsupportedSearchIndexesTests(TestCase): + def test_search_index_not_created(self): + index = SearchIndex(name="recent_test_idx", fields=["number"]) + with connection.schema_editor() as editor, self.assertNumQueries(0): + editor.add_index(index=index, model=SearchIndexTestModel) + self.assertNotIn( + index.name, + connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ), + ) + with connection.schema_editor() as editor, self.assertNumQueries(0): + editor.remove_index(index=index, model=SearchIndexTestModel) + + def test_vector_index_not_created(self): + index = VectorSearchIndex(name="recent_test_idx", fields=["number"], similarities="cosine") + with connection.schema_editor() as editor, self.assertNumQueries(0): + editor.add_index(index=index, model=SearchIndexTestModel) + self.assertNotIn( + index.name, + connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ), + ) + with connection.schema_editor() as editor, self.assertNumQueries(0): + editor.remove_index(index=index, model=SearchIndexTestModel) + + +class SearchIndexTests(SimpleTestCase): + def test_no_init_args(self): + """All arguments must be kwargs.""" + msg = "SearchIndex.__init__() takes 1 positional argument but 2 were given" + with self.assertRaisesMessage(TypeError, msg): + SearchIndex("foo") + + def test_no_extra_kargs(self): + """Unused kwargs that appear on Index aren't accepted.""" + msg = "SearchIndex.__init__() got an unexpected keyword argument 'condition'" + with self.assertRaisesMessage(TypeError, msg): + SearchIndex(condition="") + + +class VectorSearchIndexTests(SimpleTestCase): + def test_no_init_args(self): + """All arguments must be kwargs.""" + msg = "VectorSearchIndex.__init__() takes 1 positional argument but 2 were given" + with self.assertRaisesMessage(TypeError, msg): + VectorSearchIndex("foo") + + def test_no_extra_kargs(self): + """Unused kwargs that appear on Index aren't accepted.""" + msg = "VectorSearchIndex.__init__() got an unexpected keyword argument 'condition'" + with self.assertRaisesMessage(TypeError, msg): + VectorSearchIndex(condition="") + + def test_similarities_required(self): + msg = ( + "VectorSearchIndex.__init__() missing 1 required keyword-only argument: 'similarities'" + ) + with self.assertRaisesMessage(TypeError, msg): + VectorSearchIndex(name="recent_test_idx", fields=["number"]) + + def test_deconstruct(self): + index = VectorSearchIndex(name="recent_test_idx", fields=["number"], similarities="cosine") + name, args, kwargs = index.deconstruct() + self.assertEqual( + kwargs, {"name": "recent_test_idx", "fields": ["number"], "similarities": "cosine"} + ) + new = VectorSearchIndex(*args, **kwargs) + self.assertEqual(new.similarities, index.similarities) + + def test_invalid_similarity(self): + msg = "'sum' isn't a valid similarity function (cosine, dotProduct, euclidean)." + with self.assertRaisesMessage(ValueError, msg): + VectorSearchIndex(fields=["vector_data"], similarities="sum") + + def test_invalid_similarity_in_list(self): + msg = "'sum' isn't a valid similarity function (cosine, dotProduct, euclidean)." + with self.assertRaisesMessage(ValueError, msg): + VectorSearchIndex(fields=["vector_data"], similarities=["cosine", "sum"]) + + def test_define_field_twice(self): + msg = "Field 'vector_data' is duplicated in fields." + with self.assertRaisesMessage(ValueError, msg): + VectorSearchIndex( + fields=["vector_data", "vector_data"], + similarities="dotProduct", + ) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchIndexSchemaTests(SchemaAssertionMixin, TestCase): + def test_simple(self): + index = SearchIndex(name="recent_test_idx", fields=["char"]) + with connection.schema_editor() as editor: + self.assertAddRemoveIndex(editor, index=index, model=SearchIndexTestModel) + + def test_valid_fields(self): + index = SearchIndex( + name="recent_test_idx", + fields=[ + "big_integer", + "binary", + "char", + "boolean", + "datetime", + "embedded_model", + "float", + "integer", + "json", + "object_id", + "vector_integer", + "vector_float", + ], + ) + with connection.schema_editor() as editor: + editor.add_index(index=index, model=SearchIndexTestModel) + try: + index_info = connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ) + expected_options = { + "dynamic": False, + "fields": { + "big_integer": { + "indexDoubles": True, + "indexIntegers": True, + "representation": "double", + "type": "number", + }, + "binary": { + "indexOptions": "offsets", + "norms": "include", + "store": True, + "type": "string", + }, + "boolean": {"type": "boolean"}, + "char": { + "indexOptions": "offsets", + "norms": "include", + "store": True, + "type": "string", + }, + "datetime": {"type": "date"}, + "embedded_model": {"dynamic": False, "fields": {}, "type": "embeddedDocuments"}, + "float": { + "indexDoubles": True, + "indexIntegers": True, + "representation": "double", + "type": "number", + }, + "integer": { + "indexDoubles": True, + "indexIntegers": True, + "representation": "double", + "type": "number", + }, + "json": {"dynamic": False, "fields": {}, "type": "document"}, + "object_id": {"type": "objectId"}, + "vector_float": {"dynamic": False, "fields": {}, "type": "embeddedDocuments"}, + "vector_integer": {"dynamic": False, "fields": {}, "type": "embeddedDocuments"}, + }, + } + self.assertCountEqual(index_info[index.name]["columns"], index.fields) + self.assertEqual(index_info[index.name]["options"], expected_options) + finally: + with connection.schema_editor() as editor: + editor.remove_index(index=index, model=SearchIndexTestModel) + + +@skipUnlessDBFeature("supports_atlas_search") +class VectorSearchIndexSchemaTests(SchemaAssertionMixin, TestCase): + def test_simple(self): + index = VectorSearchIndex(name="recent_test_idx", fields=["integer"], similarities="cosine") + with connection.schema_editor() as editor: + self.assertAddRemoveIndex(editor, index=index, model=SearchIndexTestModel) + + def test_multiple_fields(self): + index = VectorSearchIndex( + name="recent_test_idx", + fields=[ + "boolean", + "char", + "datetime", + "embedded_model", + "integer", + "object_id", + "vector_float", + "vector_integer", + ], + similarities="cosine", + ) + with connection.schema_editor() as editor: + editor.add_index(index=index, model=SearchIndexTestModel) + try: + index_info = connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ) + expected_options = { + "latestDefinition": { + "fields": [ + {"path": "boolean", "type": "filter"}, + {"path": "char", "type": "filter"}, + {"path": "datetime", "type": "filter"}, + {"path": "embedded_model", "type": "filter"}, + {"path": "integer", "type": "filter"}, + {"path": "object_id", "type": "filter"}, + { + "numDimensions": 10, + "path": "vector_float", + "similarity": "cosine", + "type": "vector", + }, + { + "numDimensions": 10, + "path": "vector_integer", + "similarity": "cosine", + "type": "vector", + }, + ] + }, + "latestVersion": 0, + "name": "recent_test_idx", + "queryable": False, + "type": "vectorSearch", + } + self.assertCountEqual(index_info[index.name]["columns"], index.fields) + index_info[index.name]["options"].pop("id") + index_info[index.name]["options"].pop("status") + self.assertEqual(index_info[index.name]["options"], expected_options) + finally: + with connection.schema_editor() as editor: + editor.remove_index(index=index, model=SearchIndexTestModel) + + def test_similarities_list(self): + index = VectorSearchIndex( + name="recent_test_idx", + fields=["vector_float", "vector_integer"], + similarities=["cosine", "euclidean"], + ) + with connection.schema_editor() as editor: + editor.add_index(index=index, model=SearchIndexTestModel) + try: + index_info = connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ) + expected_options = { + "latestDefinition": { + "fields": [ + { + "numDimensions": 10, + "path": "vector_float", + "similarity": "cosine", + "type": "vector", + }, + { + "numDimensions": 10, + "path": "vector_integer", + "similarity": "euclidean", + "type": "vector", + }, + ] + }, + "latestVersion": 0, + "name": "recent_test_idx", + "queryable": False, + "type": "vectorSearch", + } + self.assertCountEqual(index_info[index.name]["columns"], index.fields) + index_info[index.name]["options"].pop("id") + index_info[index.name]["options"].pop("status") + self.assertEqual(index_info[index.name]["options"], expected_options) + finally: + with connection.schema_editor() as editor: + editor.remove_index(index=index, model=SearchIndexTestModel)