INTPYTHON-348 aggregate via raw_mql

aclark4life · aclark4life · commit dc5238a2218f · 2024-10-23T21:35:02.000-04:00
diff --git a/django_mongodb/manager.py b/django_mongodb/manager.py
@@ -0,0 +1,6 @@
+from django.db.models.manager import BaseManager
+from .query import MongoQuerySet
+
+
+class MongoManager(BaseManager.from_queryset(MongoQuerySet)):
+    pass
diff --git a/django_mongodb/query.py b/django_mongodb/query.py
@@ -1,14 +1,18 @@
 from functools import reduce, wraps
 from operator import add as add_operator
+from collections.abc import Mapping
 
 from django.core.exceptions import EmptyResultSet, FullResultSet
-from django.db import DatabaseError, IntegrityError, NotSupportedError
+from django.db import DatabaseError, IntegrityError, NotSupportedError, connections
+from django.db.models import QuerySet
 from django.db.models.expressions import Case, Col, When
 from django.db.models.functions import Mod
 from django.db.models.lookups import Exact
-from django.db.models.sql.constants import INNER
+from django.db.models.query import BaseIterable
+from django.db.models.sql.constants import INNER, GET_ITERATOR_CHUNK_SIZE
 from django.db.models.sql.datastructures import Join
 from django.db.models.sql.where import AND, OR, XOR, ExtraWhere, NothingNode, WhereNode
+from django.utils.functional import cached_property
 from pymongo.errors import BulkWriteError, DuplicateKeyError, PyMongoError
 
 
@@ -307,3 +311,283 @@ def register_nodes():
     Join.as_mql = join
     NothingNode.as_mql = NothingNode.as_sql
     WhereNode.as_mql = where_node
+
+
+class MongoQuerySet(QuerySet):
+    def raw_mql(self, raw_query, params=(), translations=None, using=None):
+        if using is None:
+            using = self.db
+        qs = RawQuerySet(
+            raw_query,
+            model=self.model,
+            params=params,
+            translations=translations,
+            using=using,
+        )
+        return qs
+
+
+class RawQuerySet:
+    """
+    Provide an iterator which converts the results of raw SQL queries into
+    annotated model instances.
+    """
+
+    def __init__(
+        self,
+        raw_query,
+        model=None,
+        query=None,
+        params=(),
+        translations=None,
+        using=None,
+        hints=None,
+    ):
+        self.raw_query = raw_query
+        self.model = model
+        self._db = using
+        self._hints = hints or {}
+        self.query = query or RawQuery(sql=raw_query, using=self.db, params=params)
+        self.params = params
+        self.translations = translations or {}
+        self._result_cache = None
+        self._prefetch_related_lookups = ()
+        self._prefetch_done = False
+
+    def resolve_model_init_order(self):
+        """Resolve the init field names and value positions."""
+        converter = connections[self.db].introspection.identifier_converter
+        model_init_fields = [
+            f for f in self.model._meta.fields if converter(f.column) in self.columns
+        ]
+        annotation_fields = [
+            (column, pos)
+            for pos, column in enumerate(self.columns)
+            if column not in self.model_fields
+        ]
+        model_init_order = [self.columns.index(converter(f.column)) for f in model_init_fields]
+        model_init_names = [f.attname for f in model_init_fields]
+        return model_init_names, model_init_order, annotation_fields
+
+    def prefetch_related(self, *lookups):
+        """Same as QuerySet.prefetch_related()"""
+        clone = self._clone()
+        if lookups == (None,):
+            clone._prefetch_related_lookups = ()
+        else:
+            clone._prefetch_related_lookups = clone._prefetch_related_lookups + lookups
+        return clone
+
+    def _prefetch_related_objects(self):
+        prefetch_related_objects(self._result_cache, *self._prefetch_related_lookups)
+        self._prefetch_done = True
+
+    def _clone(self):
+        """Same as QuerySet._clone()"""
+        c = self.__class__(
+            self.raw_query,
+            model=self.model,
+            query=self.query,
+            params=self.params,
+            translations=self.translations,
+            using=self._db,
+            hints=self._hints,
+        )
+        c._prefetch_related_lookups = self._prefetch_related_lookups[:]
+        return c
+
+    def _fetch_all(self):
+        if self._result_cache is None:
+            self._result_cache = list(self.iterator())
+        if self._prefetch_related_lookups and not self._prefetch_done:
+            self._prefetch_related_objects()
+
+    def __len__(self):
+        self._fetch_all()
+        return len(self._result_cache)
+
+    def __bool__(self):
+        self._fetch_all()
+        return bool(self._result_cache)
+
+    def __iter__(self):
+        self._fetch_all()
+        return iter(self._result_cache)
+
+    def __aiter__(self):
+        # Remember, __aiter__ itself is synchronous, it's the thing it returns
+        # that is async!
+        async def generator():
+            await sync_to_async(self._fetch_all)()
+            for item in self._result_cache:
+                yield item
+
+        return generator()
+
+    def iterator(self):
+        yield from RawModelIterable(self)
+
+    def __repr__(self):
+        return "<%s: %s>" % (self.__class__.__name__, self.query)
+
+    def __getitem__(self, k):
+        return list(self)[k]
+
+    @property
+    def db(self):
+        """Return the database used if this query is executed now."""
+        return self._db or router.db_for_read(self.model, **self._hints)
+
+    def using(self, alias):
+        """Select the database this RawQuerySet should execute against."""
+        return RawQuerySet(
+            self.raw_query,
+            model=self.model,
+            query=self.query.chain(using=alias),
+            params=self.params,
+            translations=self.translations,
+            using=alias,
+        )
+
+    @cached_property
+    def columns(self):
+        """
+        A list of model field names in the order they'll appear in the
+        query results.
+        """
+        columns = self.query.get_columns()
+        # Adjust any column names which don't match field names
+        for query_name, model_name in self.translations.items():
+            # Ignore translations for nonexistent column names
+            try:
+                index = columns.index(query_name)
+            except ValueError:
+                pass
+            else:
+                columns[index] = model_name
+        return columns
+
+    @cached_property
+    def model_fields(self):
+        """A dict mapping column names to model field names."""
+        converter = connections[self.db].introspection.identifier_converter
+        model_fields = {}
+        for field in self.model._meta.fields:
+            name, column = field.get_attname_column()
+            model_fields[converter(column)] = field
+        return model_fields
+
+
+class RawQuery:
+    """A single raw SQL query."""
+
+    def __init__(self, sql, using, params=()):
+        self.params = params
+        self.sql = sql
+        self.using = using
+        self.cursor = None
+
+        # Mirror some properties of a normal query so that
+        # the compiler can be used to process results.
+        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
+        self.extra_select = {}
+        self.annotation_select = {}
+
+    def chain(self, using):
+        return self.clone(using)
+
+    def clone(self, using):
+        return RawQuery(self.sql, using, params=self.params)
+
+    def get_columns(self):
+        if self.cursor is None:
+            self._execute_query()
+        converter = connections[self.using].introspection.identifier_converter
+        return [converter(column_meta[0]) for column_meta in self.cursor.description]
+
+    def __iter__(self):
+        # Always execute a new query for a new iterator.
+        # This could be optimized with a cache at the expense of RAM.
+        self._execute_query()
+        if not connections[self.using].features.can_use_chunked_reads:
+            # If the database can't use chunked reads we need to make sure we
+            # evaluate the entire query up front.
+            result = list(self.cursor)
+        else:
+            result = self.cursor
+        return iter(result)
+
+    def __repr__(self):
+        return "<%s: %s>" % (self.__class__.__name__, self)
+
+    @property
+    def params_type(self):
+        if self.params is None:
+            return None
+        return dict if isinstance(self.params, Mapping) else tuple
+
+    def __str__(self):
+        if self.params_type is None:
+            return self.sql
+        return self.sql % self.params_type(self.params)
+
+    def _execute_query(self):
+        connection = connections[self.using]
+
+        # Adapt parameters to the database, as much as possible considering
+        # that the target type isn't known. See #17755.
+        params_type = self.params_type
+        adapter = connection.ops.adapt_unknown_value
+        if params_type is tuple:
+            params = tuple(adapter(val) for val in self.params)
+        elif params_type is dict:
+            params = {key: adapter(val) for key, val in self.params.items()}
+        elif params_type is None:
+            params = None
+        else:
+            raise RuntimeError("Unexpected params type: %s" % params_type)
+
+        self.cursor = connection.cursor()
+        self.cursor.execute(self.sql, params)
+
+
+class RawModelIterable(BaseIterable):
+    """
+    Iterable that yields a model instance for each row from a raw queryset.
+    """
+
+    def __iter__(self):
+        # Cache some things for performance reasons outside the loop.
+        db = self.queryset.db
+        query = self.queryset.query
+        connection = connections[db]
+        compiler = connection.ops.compiler("SQLCompiler")(query, connection, db)
+        query_iterator = iter(query)
+
+        try:
+            (
+                model_init_names,
+                model_init_pos,
+                annotation_fields,
+            ) = self.queryset.resolve_model_init_order()
+            model_cls = self.queryset.model
+            if model_cls._meta.pk.attname not in model_init_names:
+                raise exceptions.FieldDoesNotExist("Raw query must include the primary key")
+            fields = [self.queryset.model_fields.get(c) for c in self.queryset.columns]
+            converters = compiler.get_converters(
+                [f.get_col(f.model._meta.db_table) if f else None for f in fields]
+            )
+            if converters:
+                query_iterator = compiler.apply_converters(query_iterator, converters)
+            for values in query_iterator:
+                # Associate fields to values
+                model_init_values = [values[pos] for pos in model_init_pos]
+                instance = model_cls.from_db(db, model_init_names, model_init_values)
+                if annotation_fields:
+                    for column, pos in annotation_fields:
+                        setattr(instance, column, values[pos])
+                yield instance
+        finally:
+            # Done iterating the Query. If it has its own cursor, close it.
+            if hasattr(query, "cursor") and query.cursor:
+                query.cursor.close()