Skip to content

add support for ordering by expressions #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ Migrations for 'admin':
- `Subquery`, `Exists`, and using a `QuerySet` in `QuerySet.annotate()` aren't
supported.

* Ordering a `QuerySet` by `nulls_first` or `nulls_last` isn't supported.
Neither is randomized ordering.

- `DateTimeField` doesn't support microsecond precision, and correspondingly,
`DurationField` stores milliseconds rather than microseconds.

Expand Down
126 changes: 67 additions & 59 deletions django_mongodb/compiler.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import itertools
from collections import defaultdict

from bson import SON
from django.core.exceptions import EmptyResultSet, FullResultSet
from django.db import DatabaseError, IntegrityError, NotSupportedError
from django.db.models import Count, Expression
from django.db.models.aggregates import Aggregate, Variance
from django.db.models.expressions import Col, OrderBy, Value
from django.db.models.expressions import Case, Col, Ref, Value, When
from django.db.models.functions.comparison import Coalesce
from django.db.models.functions.math import Power
from django.db.models.lookups import IsNull
from django.db.models.sql import compiler
from django.db.models.sql.constants import GET_ITERATOR_CHUNK_SIZE, MULTI, ORDER_DIR, SINGLE
from django.db.models.sql.constants import GET_ITERATOR_CHUNK_SIZE, MULTI, SINGLE
from django.utils.functional import cached_property
from pymongo import ASCENDING, DESCENDING

from .base import Cursor
from .query import MongoQuery, wrap_database_errors
Expand All @@ -25,6 +28,8 @@ class SQLCompiler(compiler.SQLCompiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.aggregation_pipeline = None
# A list of OrderBy objects for this query.
self.order_by_objs = None

def _get_group_alias_column(self, expr, annotation_group_idx):
"""Generate a dummy field for use in the ids fields in $group."""
Expand Down Expand Up @@ -98,7 +103,7 @@ def _prepare_expressions_for_pipeline(self, expression, target, annotation_group
replacements[sub_expr] = replacing_expr
return replacements, group

def _prepare_annotations_for_aggregation_pipeline(self):
def _prepare_annotations_for_aggregation_pipeline(self, order_by):
"""Prepare annotations for the aggregation pipeline."""
replacements = {}
group = {}
Expand All @@ -110,6 +115,13 @@ def _prepare_annotations_for_aggregation_pipeline(self):
)
replacements.update(new_replacements)
group.update(expr_group)
for expr, _ in order_by:
if expr.contains_aggregate:
new_replacements, expr_group = self._prepare_expressions_for_pipeline(
expr, None, annotation_group_idx
)
replacements.update(new_replacements)
group.update(expr_group)
having_replacements, having_group = self._prepare_expressions_for_pipeline(
self.having, None, annotation_group_idx
)
Expand All @@ -121,9 +133,10 @@ def _get_group_id_expressions(self, order_by):
"""Generate group ID expressions for the aggregation pipeline."""
group_expressions = set()
replacements = {}
for expr, (_, _, is_ref) in order_by:
if not is_ref:
group_expressions |= set(expr.get_group_by_cols())
if not self._meta_ordering:
for expr, (_, _, is_ref) in order_by:
if not is_ref:
group_expressions |= set(expr.get_group_by_cols())
for expr, *_ in self.select:
group_expressions |= set(expr.get_group_by_cols())
having_group_by = self.having.get_group_by_cols() if self.having else ()
Expand Down Expand Up @@ -187,7 +200,7 @@ def _build_aggregation_pipeline(self, ids, group):

def pre_sql_setup(self, with_col_aliases=False):
extra_select, order_by, group_by = super().pre_sql_setup(with_col_aliases=with_col_aliases)
group, all_replacements = self._prepare_annotations_for_aggregation_pipeline()
group, all_replacements = self._prepare_annotations_for_aggregation_pipeline(order_by)
# query.group_by is either:
# - None: no GROUP BY
# - True: group by select fields
Expand All @@ -211,6 +224,7 @@ def pre_sql_setup(self, with_col_aliases=False):
target: expr.replace_expressions(all_replacements)
for target, expr in self.query.annotation_select.items()
}
self.order_by_objs = [expr.replace_expressions(all_replacements) for expr, _ in order_by]
return extra_select, order_by, group_by

def execute_sql(
Expand Down Expand Up @@ -333,8 +347,17 @@ def build_query(self, columns=None):
self.check_query()
query = self.query_class(self)
query.lookup_pipeline = self.get_lookup_pipeline()
query.order_by(self._get_ordering())
query.project_fields = self.get_project_fields(columns, ordering=query.ordering)
ordering_fields, sort_ordering, extra_fields = self._get_ordering()
query.project_fields = self.get_project_fields(columns, ordering_fields)
query.ordering = sort_ordering
# If columns is None, then get_project_fields() won't add
# ordering_fields to $project. Use $addFields (extra_fields) instead.
if columns is None:
extra_fields += ordering_fields
if extra_fields:
query.extra_fields = {
field_name: expr.as_mql(self, self.connection) for field_name, expr in extra_fields
}
where = self.get_where()
try:
expr = where.as_mql(self, self.connection) if where else {}
Expand Down Expand Up @@ -380,52 +403,6 @@ def project_field(column):
+ tuple(map(project_field, related_columns))
)

def _get_ordering(self):
"""
Return a list of (field, ascending) tuples that the query results
should be ordered by. If there is no field ordering defined, return
the standard_ordering (a boolean, needed for MongoDB "$natural"
ordering).
"""
opts = self.query.get_meta()
ordering = (
self.query.order_by or opts.ordering
if self.query.default_ordering
else self.query.order_by
)
if not ordering:
return self.query.standard_ordering
default_order, _ = ORDER_DIR["ASC" if self.query.standard_ordering else "DESC"]
column_ordering = []
columns_seen = set()
for order in ordering:
if order == "?":
raise NotSupportedError("Randomized ordering isn't supported by MongoDB.")
if hasattr(order, "resolve_expression"):
# order is an expression like OrderBy, F, or database function.
orderby = order if isinstance(order, OrderBy) else order.asc()
orderby = orderby.resolve_expression(self.query, allow_joins=True, reuse=None)
ascending = not orderby.descending
# If the query is reversed, ascending and descending are inverted.
if not self.query.standard_ordering:
ascending = not ascending
else:
# order is a string like "field" or "field__other_field".
orderby, _ = self.find_ordering_name(
order, self.query.get_meta(), default_order=default_order
)[0]
ascending = not orderby.descending
column = orderby.expression.as_mql(self, self.connection)
if isinstance(column, dict):
raise NotSupportedError("order_by() expression not supported.")
# $sort references must not include the dollar sign.
column = column.removeprefix("$")
# Don't add the same column twice.
if column not in columns_seen:
columns_seen.add(column)
column_ordering.append((column, ascending))
return column_ordering

@cached_property
def collection_name(self):
return self.query.get_meta().db_table
Expand Down Expand Up @@ -473,12 +450,43 @@ def get_project_fields(self, columns=None, ordering=None):
if self.query.alias_refcount[alias] and self.collection_name != alias:
fields[alias] = 1
# Add order_by() fields.
for column, _ in ordering or []:
foreign_table = column.split(".", 1)[0] if "." in column else None
if column not in fields and foreign_table not in fields:
fields[column] = 1
for alias, expression in ordering or []:
nested_entity = alias.split(".", 1)[0] if "." in alias else None
if alias not in fields and nested_entity not in fields:
fields[alias] = expression.as_mql(self, self.connection)
return fields

def _get_ordering(self):
"""
Process the query's OrderBy objects and return:
- A tuple of ('field_name': Col/Expression, ...)
- A bson.SON mapping to pass to $sort.
- A tuple of ('field_name': Expression, ...) for expressions that need
to be added to extra_fields.
"""
fields = {}
sort_ordering = SON()
extra_fields = {}
idx = itertools.count(start=1)
for order in self.order_by_objs or []:
if isinstance(order.expression, Col):
field_name = order.expression.as_mql(self, self.connection).removeprefix("$")
fields[field_name] = order.expression
elif isinstance(order.expression, Ref):
field_name = order.expression.as_mql(self, self.connection).removeprefix("$")
else:
field_name = f"__order{next(idx)}"
fields[field_name] = order.expression
Copy link
Collaborator

@timgraham timgraham Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

todo: understand why this changes from extra_fields to fields in the second commit and update comment two lines above.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the reason is:
The refs are actually references to specific fields selected in a query. These references compile to a position number within the as_sql method. To address this, I decided to move the refs to the post-project fields. Additionally, some of these refs are calculated during the project stage (such as annotations), so the refs are processed after the project stage. But the other projected fields that aren't refs have all the info to make the full expression.

Btw, 'refs' also has all the info to be compiled well, like an annotation with name (something that is not __annotation but isn't a column), it has the reference but also the full expression with it. So if we want to delete the extra field we could, the downside will be a field will be calculated twice or more.

Copy link
Collaborator

@timgraham timgraham Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even with the explanation, it's still difficult to understand. Now in this method, extra_fields is only used for nulls first/last, but then outside this method, all ordering_fields are added to extra_fields when columns is None (I guess because they won't be added to $project in that case). edit: I tried to clarify this with a comment.

# If the expression is ordered by NULLS FIRST or NULLS LAST,
# add a field for sorting that's 1 if null or 0 if not.
if order.nulls_first or order.nulls_last:
null_fieldname = f"__order{next(idx)}"
condition = When(IsNull(order.expression, True), then=Value(1))
extra_fields[null_fieldname] = Case(condition, default=Value(0))
sort_ordering[null_fieldname] = DESCENDING if order.nulls_first else ASCENDING
sort_ordering[field_name] = DESCENDING if order.descending else ASCENDING
return tuple(fields.items()), sort_ordering, tuple(extra_fields.items())

def get_where(self):
return self.where

Expand Down
55 changes: 3 additions & 52 deletions django_mongodb/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,36 +32,9 @@ class DatabaseFeatures(BaseDatabaseFeatures):
"lookup.tests.LookupTests.test_exact_none_transform",
# "Save with update_fields did not affect any rows."
"basic.tests.SelectOnSaveTests.test_select_on_save_lying_update",
# Order by constant not supported:
# AttributeError: 'Field' object has no attribute 'model'
"aggregation.tests.AggregateTestCase.test_annotate_values_list",
"aggregation.tests.AggregateTestCase.test_grouped_annotation_in_group_by",
"aggregation.tests.AggregateTestCase.test_non_grouped_annotation_not_in_group_by",
"aggregation.tests.AggregateTestCase.test_values_annotation_with_expression",
"annotations.tests.NonAggregateAnnotationTestCase.test_order_by_aggregate",
"model_fields.test_jsonfield.TestQuerying.test_ordering_grouping_by_count",
"ordering.tests.OrderingTests.test_default_ordering_does_not_affect_group_by",
"ordering.tests.OrderingTests.test_order_by_constant_value",
"expressions.tests.NegatedExpressionTests.test_filter",
"expressions_case.tests.CaseExpressionTests.test_order_by_conditional_implicit",
# BaseExpression.convert_value() crashes with Decimal128.
"aggregation.tests.AggregateTestCase.test_combine_different_types",
"annotations.tests.NonAggregateAnnotationTestCase.test_combined_f_expression_annotation_with_aggregation",
# NotSupportedError: order_by() expression not supported.
"aggregation.tests.AggregateTestCase.test_aggregation_order_by_not_selected_annotation_values",
"db_functions.comparison.test_coalesce.CoalesceTests.test_ordering",
"db_functions.tests.FunctionTests.test_nested_function_ordering",
"db_functions.text.test_length.LengthTests.test_ordering",
"db_functions.text.test_strindex.StrIndexTests.test_order_by",
"expressions_case.tests.CaseExpressionTests.test_order_by_conditional_explicit",
"lookup.tests.LookupQueryingTests.test_lookup_in_order_by",
"ordering.tests.OrderingTests.test_order_by_expr_query_reuse",
"ordering.tests.OrderingTests.test_order_by_expression_ref",
"ordering.tests.OrderingTests.test_ordering_select_related_collision",
"queries.tests.Queries1Tests.test_order_by_related_field_transform",
"update.tests.AdvancedTests.test_update_ordered_by_inline_m2m_annotation",
"update.tests.AdvancedTests.test_update_ordered_by_m2m_annotation",
"update.tests.AdvancedTests.test_update_ordered_by_m2m_annotation_desc",
# Pattern lookups that use regexMatch don't work on JSONField:
# Unsupported conversion from array to string in $convert
"model_fields.test_jsonfield.TestQuerying.test_icontains",
Expand All @@ -76,11 +49,6 @@ class DatabaseFeatures(BaseDatabaseFeatures):
"db_functions.datetime.test_extract_trunc.DateFunctionWithTimeZoneTests.test_trunc_timezone_applied_before_truncation",
# Length of null considered zero rather than null.
"db_functions.text.test_length.LengthTests.test_basic",
# Key transforms are incorrectly treated as joins:
# Ordering can't span tables on MongoDB (value_custom__a).
"model_fields.test_jsonfield.TestQuerying.test_order_grouping_custom_decoder",
"model_fields.test_jsonfield.TestQuerying.test_ordering_by_transform",
"model_fields.test_jsonfield.TestQuerying.test_ordering_grouping_by_key_transform",
# Wrong results in queries with multiple tables.
"annotations.tests.NonAggregateAnnotationTestCase.test_annotation_reverse_m2m",
"annotations.tests.NonAggregateAnnotationTestCase.test_annotation_with_m2m",
Expand All @@ -89,17 +57,9 @@ class DatabaseFeatures(BaseDatabaseFeatures):
"expressions.test_queryset_values.ValuesExpressionsTests.test_values_list_expression",
"expressions.test_queryset_values.ValuesExpressionsTests.test_values_list_expression_flat",
"expressions.tests.IterableLookupInnerExpressionsTests.test_expressions_in_lookups_join_choice",
"ordering.tests.OrderingTests.test_order_by_grandparent_fk_with_expression_in_default_ordering",
"ordering.tests.OrderingTests.test_order_by_parent_fk_with_expression_in_default_ordering",
"ordering.tests.OrderingTests.test_order_by_ptr_field_with_default_ordering_by_expression",
"queries.tests.Queries1Tests.test_order_by_tables",
"queries.tests.TestTicket24605.test_ticket_24605",
"queries.tests.TestInvalidValuesRelation.test_invalid_values",
# alias().order_by() doesn't work.
"annotations.tests.AliasTests.test_order_by_alias",
"annotations.tests.AliasTests.test_order_by_alias_aggregate",
# annotate() + values_list() + order_by() loses annotated value.
"expressions_case.tests.CaseExpressionTests.test_annotate_values_not_in_order_by",
# QuerySet.explain() not implemented:
# https://github.com/mongodb-labs/django-mongodb/issues/28
"queries.test_explain.ExplainUnsupportedTests.test_message",
Expand All @@ -108,9 +68,6 @@ class DatabaseFeatures(BaseDatabaseFeatures):
"aggregation.tests.AggregateTestCase.test_aggregation_default_passed_another_aggregate",
"aggregation.tests.AggregateTestCase.test_annotation_expressions",
"aggregation.tests.AggregateTestCase.test_reverse_fkey_annotate",
# Incorrect order: pipeline does not order by the correct fields.
"aggregation.tests.AggregateTestCase.test_annotate_ordering",
"aggregation.tests.AggregateTestCase.test_even_more_aggregate",
}
# $bitAnd, #bitOr, and $bitXor are new in MongoDB 6.3.
_django_test_expected_failures_bitwise = {
Expand Down Expand Up @@ -395,6 +352,8 @@ def django_test_expected_failures(self):
"Cannot use QuerySet.update() when querying across multiple collections on MongoDB.": {
"queries.tests.Queries4Tests.test_ticket7095",
"queries.tests.Queries5Tests.test_ticket9848",
"update.tests.AdvancedTests.test_update_ordered_by_m2m_annotation",
"update.tests.AdvancedTests.test_update_ordered_by_m2m_annotation_desc",
},
"QuerySet.dates() is not supported on MongoDB.": {
"aggregation.tests.AggregateTestCase.test_dates_with_aggregation",
Expand All @@ -417,6 +376,7 @@ def django_test_expected_failures(self):
"QuerySet.distinct() is not supported.": {
"aggregation.tests.AggregateTestCase.test_sum_distinct_aggregate",
"lookup.tests.LookupTests.test_lookup_collision_distinct",
"ordering.tests.OrderingTests.test_orders_nulls_first_on_filtered_subquery",
"queries.tests.ExcludeTest17600.test_exclude_plain_distinct",
"queries.tests.ExcludeTest17600.test_exclude_with_q_is_equal_to_plain_exclude",
"queries.tests.ExcludeTest17600.test_exclude_with_q_object_distinct",
Expand Down Expand Up @@ -460,11 +420,6 @@ def django_test_expected_failures(self):
"queries.tests.ValuesQuerysetTests.test_named_values_list_without_fields",
"select_related.tests.SelectRelatedTests.test_select_related_with_extra",
},
"Ordering a QuerySet by null_first/nulls_last is not supported on MongoDB.": {
"ordering.tests.OrderingTests.test_order_by_nulls_first",
"ordering.tests.OrderingTests.test_order_by_nulls_last",
"ordering.tests.OrderingTests.test_orders_nulls_first_on_filtered_subquery",
},
"QuerySet.update() crash: Unrecognized expression '$count'": {
"update.tests.AdvancedTests.test_update_annotated_multi_table_queryset",
},
Expand Down Expand Up @@ -577,10 +532,6 @@ def django_test_expected_failures(self):
"model_fields.test_jsonfield.TestQuerying.test_none_key",
"model_fields.test_jsonfield.TestQuerying.test_none_key_exclude",
},
"Randomized ordering isn't supported by MongoDB.": {
"aggregation.tests.AggregateTestCase.test_aggregation_random_ordering",
"ordering.tests.OrderingTests.test_random_ordering",
},
"Queries without a collection aren't supported on MongoDB.": {
"queries.test_q.QCheckTests",
"queries.test_query.TestQueryNoModel",
Expand Down
2 changes: 1 addition & 1 deletion django_mongodb/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def wrapped(self, compiler, connection):
lhs_mql = process_lhs(self, compiler, connection)
return {
"$cond": {
"if": {"$eq": [lhs_mql, None]},
"if": connection.mongo_operators["isnull"](lhs_mql, True),
"then": None,
"else": {f"${operator}": lhs_mql},
}
Expand Down
Loading