From be439630ac03d1369680d518f65ef8a20ccc1687 Mon Sep 17 00:00:00 2001 From: Jib Date: Thu, 21 Aug 2025 16:11:15 -0400 Subject: [PATCH 1/9] PoC Expression Conversion Abstraction --- query_conversion/__init__.py | 0 query_conversion/expression_converters.py | 126 ++++++++++ query_conversion/query_optimizer.py | 267 ++++++++++++++++++++++ tests/expression_converter_/__init__.py | 0 tests/expression_converter_/models.py | 9 + tests/expression_converter_/tests.py | 0 6 files changed, 402 insertions(+) create mode 100644 query_conversion/__init__.py create mode 100644 query_conversion/expression_converters.py create mode 100644 query_conversion/query_optimizer.py create mode 100644 tests/expression_converter_/__init__.py create mode 100644 tests/expression_converter_/models.py create mode 100644 tests/expression_converter_/tests.py diff --git a/query_conversion/__init__.py b/query_conversion/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/query_conversion/expression_converters.py b/query_conversion/expression_converters.py new file mode 100644 index 000000000..b9a546661 --- /dev/null +++ b/query_conversion/expression_converters.py @@ -0,0 +1,126 @@ +"""Expression To Match Converters""" + + +class _BaseExpressionConverter: + """ + Base class for optimizers that handle specific operations in MQL queries. + This class can be extended to implement optimizations for other operations. + """ + + @classmethod + def optimize(cls, expr): + raise NotImplementedError("Subclasses should implement this method.") + + @classmethod + def is_simple_value(cls, value): + """ + Check if the value is a simple type (not a list or dict). + """ + return isinstance(value, (str, int, float, bool)) or value is None + + @classmethod + def is_convertable_field_name(cls, field_name): + """Validate a field_name is one that can be represented in $match""" + # This needs work and re-evaluation + if ( + isinstance(field_name, str) + and field_name.startswith("$") + and not field_name[:1].isalnum() + ): + return True + return False + + +class _EqExpressionConverter(_BaseExpressionConverter): + """Convert $eq operation to a $match compatible query.""" + + @classmethod + def optimize(cls, eq_args): + if isinstance(eq_args, list) and len(eq_args) == 2: + field_expr, value = eq_args + + # Check if first argument is a simple field reference + if ( + isinstance(field_expr, str) + and field_expr.startswith("$") + and cls.is_simple_value(value) + ): + field_name = field_expr[1:] # Remove the $ prefix + return {field_name: value} + + return None + + +class _InExpressionConverter(_BaseExpressionConverter): + """Convert $in operation to a $match compatible query.""" + + @classmethod + def optimize(cls, in_args): + if isinstance(in_args, list) and len(in_args) == 2: + field_expr, values = in_args + + # Check if first argument is a simple field reference + if isinstance(field_expr, str) and field_expr.startswith("$"): + field_name = field_expr[1:] # Remove the $ prefix + if isinstance(values, list) and all( + cls.is_simple_value(v) for v in values + ): + return {field_name: {"$in": values}} + + return None + + +class _LogicalExpressionConverter(_BaseExpressionConverter): + """Generic for converting logical operations to a $match compatible query.""" + + @classmethod + def optimize(cls, combined_conditions): + if isinstance(combined_conditions, list): + optimized_conditions = [] + for condition in combined_conditions: + if isinstance(condition, dict) and len(condition) == 1: + if optimized_condition := convert_expression(condition): + optimized_conditions.append(optimized_condition) + else: + # Any failure should stop optimization + return None + if optimized_conditions: + return {cls._logical_op: optimized_conditions} + return None + + +class _OrExpressionConverter(_LogicalExpressionConverter): + """Convert $or operation to a $match compatible query.""" + + _logical_op = "$or" + + +class _AndExpressionConverter(_LogicalExpressionConverter): + """Convert $and operation to a $match compatible query.""" + + _logical_op = "$and" + + +OPTIMIZABLE_OPS = { + "$eq": _EqExpressionConverter, + "$in": _InExpressionConverter, + "$and": _AndExpressionConverter, + "$or": _OrExpressionConverter, +} + + +def convert_expression(expr): + """ + Optimize an MQL expression by extracting optimizable conditions. + + Args: + expr: Dictionary containing the MQL expression + + Returns: + Optimized match condition or None if not optimizable + """ + if isinstance(expr, dict) and len(expr) == 1: + op = list(expr.keys())[0] + if op in OPTIMIZABLE_OPS: + return OPTIMIZABLE_OPS[op].optimize(expr[op]) + return None diff --git a/query_conversion/query_optimizer.py b/query_conversion/query_optimizer.py new file mode 100644 index 000000000..c0140164b --- /dev/null +++ b/query_conversion/query_optimizer.py @@ -0,0 +1,267 @@ +from copy import deepcopy + +from expression_converters import convert_expression + + +class QueryOptimizer: + def optimize(self, expr): + """ + Takes an MQL query with $expr and optimizes it by extracting + optimizable conditions into separate $match stages. + + Args: + expr_query: Dictionary containing the $expr query + + Returns: + List of optimized match conditions + """ + expr_query = deepcopy(expr) + + if "$expr" not in expr_query: + return [expr_query] + + if expr_query["$expr"] == {}: + return [{"$match": {}}] + + expr_content = expr_query["$expr"] + match_conditions = [] + remaining_expr_conditions = [] + + # Handle the expression content + self._process_expression( + expr_content, match_conditions, remaining_expr_conditions + ) + + # If there are remaining conditions that couldn't be optimized, + # keep them in an $expr + if remaining_expr_conditions: + if len(remaining_expr_conditions) == 1: + expr_conditions = {"$expr": remaining_expr_conditions[0]} + else: + expr_conditions = {"$expr": {"$and": remaining_expr_conditions}} + + if match_conditions: + # This assumes match_conditions is a list of dicts with $match + match_conditions[0]["$match"].update(expr_conditions) + + return match_conditions + + def _process_expression(self, expr, match_conditions, remaining_conditions): + """ + Process an expression and extract optimizable conditions. + + Args: + expr: The expression to process + match_conditions: List to append optimized match conditions + remaining_conditions: List to append non-optimizable conditions + """ + if isinstance(expr, dict): + # Check if this is an $and operation + has_and = "$and" in expr + has_or = "$or" in expr + # Do a top-level check for $and or $or because these should inform + # If they fail, they should failover to a remaining conditions list + # There's probably a better way to do this, but this is a start + if has_and: + self._process_logical_conditions( + "$and", expr["$and"], match_conditions, remaining_conditions + ) + if has_or: + self._process_logical_conditions( + "$or", expr["$or"], match_conditions, remaining_conditions + ) + if not has_and and not has_or: + # Process single condition + optimized = convert_expression(expr) + if optimized: + match_conditions.append({"$match": optimized}) + else: + remaining_conditions.append(expr) + else: + # Can't optimize + remaining_conditions.append(expr) + + def _process_logical_conditions( + self, logical_op, logical_conditions, match_conditions, remaining_conditions + ): + """ + Process conditions within a logical array. + + Args: + logical_conditions: List of conditions within logical operator + match_conditions: List to append optimized match conditions + remaining_conditions: List to append non-optimizable conditions + """ + optimized_conditions = [] + for condition in logical_conditions: + if isinstance(condition, dict): + if optimized := convert_expression(condition): + optimized_conditions.append(optimized) + else: + remaining_conditions.append(condition) + else: + remaining_conditions.append(condition) + match_conditions.append({"$match": {logical_op: optimized_conditions}}) + + +def test_optimizer(optimizer, query, idx): + """ + Test the QueryOptimizer with various conditions. + """ + print("Before optimization:") + pprint(query) + print("After optimization:") + pprint(optimizer.optimize(query)) + print() + + +# Example usage and test cases +if __name__ == "__main__": + optimizer = QueryOptimizer() + from pprint import pprint + + # Test case 1: Simple $eq + query1 = {"$expr": {"$eq": ["$status", "active"]}}, ("Test 1 - Simple $eq:") + + # Test case 2: Simple $in + query2 = ( + {"$expr": {"$in": ["$category", ["electronics", "books", "clothing"]]}}, + ("Test 2 - Simple $in:"), + ) + + # Test case 3: $and with multiple optimizable conditions + query3 = ( + { + "$expr": { + "$and": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + ] + } + }, + ("Test 3 - $and with optimizable conditions:"), + ) + + # Test case 4: Mixed optimizable and non-optimizable conditions + query4 = ( + { + "$expr": { + "$and": [ + {"$eq": ["$status", "active"]}, + {"$gt": ["$price", 100]}, # Not optimizable + {"$in": ["$category", ["electronics"]]}, + ] + } + }, + ("Test 4 - Mixed conditions:"), + ) + + # Test case 5: Non-optimizable condition + query5 = ( + {"$expr": {"$gt": ["$price", 100]}}, + ("Test 5 - Non-optimizable condition:"), + ) + + # Test case 6: Nested $or conditions + query6 = ( + { + "$expr": { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + {"$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}]}, + ] + } + }, + ("Test 6 - Nested $or conditions:"), + ) + + # Test case 7: Complex nested conditions with non-optimizable parts + query7 = ( + { + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$gt": ["$views", 1000]}, + ] + }, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + {"$gt": ["$price", 50]}, # Not optimizable + ] + } + }, + ("Test 7 - Complex nested conditions:"), + ) + + # Test case 8: London $in test case + query8 = ( + {"$expr": {"$in": ["$author_city", ["London"]]}}, + ("Test 8 - London $in test case:"), + ) + + # Test case 9: Deeply nested logical operations + query9 = ( + { + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + {"$eq": ["$type", "standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, + ] + }, + {"$eq": ["$active", True]}, + ] + } + }, + ("Test 9 - Deeply nested logical operations:"), + ) + + # Test case 10: Deeply nested logical operations, with Variable! + query10 = ( + { + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + {"$eq": ["$type", "standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, + ] + }, + {"$eq": ["$active", True]}, + ] + } + }, + ("Test 10 - Deeply nested logical operations, with Variables!"), + ) + + queries = [ + query1, + query2, + query3, + query4, + query5, + query6, + query7, + query8, + query9, + query10, + ] + + for idx, (query, description) in enumerate(queries, start=1): + print(description) + test_optimizer(optimizer, query, idx) diff --git a/tests/expression_converter_/__init__.py b/tests/expression_converter_/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/expression_converter_/models.py b/tests/expression_converter_/models.py new file mode 100644 index 000000000..dad8d8853 --- /dev/null +++ b/tests/expression_converter_/models.py @@ -0,0 +1,9 @@ +from django.db import models + + +class Person(models.Model): + name = models.CharField(max_length=100) + age = models.IntegerField() + + def __str__(self): + return self.name diff --git a/tests/expression_converter_/tests.py b/tests/expression_converter_/tests.py new file mode 100644 index 000000000..e69de29bb From deac776d13077b63cb5f855e88a664abc927f0dc Mon Sep 17 00:00:00 2001 From: Jib Date: Mon, 25 Aug 2025 02:53:07 +0300 Subject: [PATCH 2/9] Improvements and Testing --- .../query_conversion}/__init__.py | 0 .../expression_converters.py | 17 +- .../query_conversion/query_optimizer.py | 107 +++++++ query_conversion/query_optimizer.py | 267 ------------------ tests/expression_converter_/models.py | 11 +- .../test_match_conversion.py | 232 +++++++++++++++ .../test_op_expressions.py | 195 +++++++++++++ tests/expression_converter_/tests.py | 0 8 files changed, 554 insertions(+), 275 deletions(-) rename {query_conversion => django_mongodb_backend/query_conversion}/__init__.py (100%) rename {query_conversion => django_mongodb_backend/query_conversion}/expression_converters.py (88%) create mode 100644 django_mongodb_backend/query_conversion/query_optimizer.py delete mode 100644 query_conversion/query_optimizer.py create mode 100644 tests/expression_converter_/test_match_conversion.py create mode 100644 tests/expression_converter_/test_op_expressions.py delete mode 100644 tests/expression_converter_/tests.py diff --git a/query_conversion/__init__.py b/django_mongodb_backend/query_conversion/__init__.py similarity index 100% rename from query_conversion/__init__.py rename to django_mongodb_backend/query_conversion/__init__.py diff --git a/query_conversion/expression_converters.py b/django_mongodb_backend/query_conversion/expression_converters.py similarity index 88% rename from query_conversion/expression_converters.py rename to django_mongodb_backend/query_conversion/expression_converters.py index b9a546661..af40b6b03 100644 --- a/query_conversion/expression_converters.py +++ b/django_mongodb_backend/query_conversion/expression_converters.py @@ -14,21 +14,24 @@ def optimize(cls, expr): @classmethod def is_simple_value(cls, value): """ - Check if the value is a simple type (not a list or dict). + Check if the value is a simple type (not a dict). """ - return isinstance(value, (str, int, float, bool)) or value is None + if isinstance(value, str) and value.startswith("$"): + return False + if isinstance(value, list | tuple | set): + return all(cls.is_simple_value(v) for v in value) + # TODO: Expand functionality to support `$getField` conversion + return not isinstance(value, dict) or value is None @classmethod def is_convertable_field_name(cls, field_name): """Validate a field_name is one that can be represented in $match""" # This needs work and re-evaluation - if ( + return ( isinstance(field_name, str) and field_name.startswith("$") and not field_name[:1].isalnum() - ): - return True - return False + ) class _EqExpressionConverter(_BaseExpressionConverter): @@ -62,7 +65,7 @@ def optimize(cls, in_args): # Check if first argument is a simple field reference if isinstance(field_expr, str) and field_expr.startswith("$"): field_name = field_expr[1:] # Remove the $ prefix - if isinstance(values, list) and all( + if isinstance(values, list | tuple | set) and all( cls.is_simple_value(v) for v in values ): return {field_name: {"$in": values}} diff --git a/django_mongodb_backend/query_conversion/query_optimizer.py b/django_mongodb_backend/query_conversion/query_optimizer.py new file mode 100644 index 000000000..5eb4f1bc1 --- /dev/null +++ b/django_mongodb_backend/query_conversion/query_optimizer.py @@ -0,0 +1,107 @@ +from copy import deepcopy + +from django_mongodb_backend.query_conversion.expression_converters import convert_expression + + +class QueryOptimizer: + def convert_expr_to_match(self, expr): + """ + Takes an MQL query with $expr and optimizes it by extracting + optimizable conditions into separate $match stages. + + Args: + expr_query: Dictionary containing the $expr query + + Returns: + List of optimized match conditions + """ + expr_query = deepcopy(expr) + + if "$expr" not in expr_query: + return [expr_query] + + if expr_query["$expr"] == {}: + return [{"$match": {}}] + + expr_content = expr_query["$expr"] + match_conditions = [] + remaining_expr_conditions = [] + + # Handle the expression content + self._process_expression(expr_content, match_conditions, remaining_expr_conditions) + + # If there are remaining conditions that couldn't be optimized, + # keep them in an $expr + if remaining_expr_conditions: + if len(remaining_expr_conditions) == 1: + expr_conditions = {"$expr": remaining_expr_conditions[0]} + else: + expr_conditions = {"$expr": {"$and": remaining_expr_conditions}} + + if match_conditions: + # This assumes match_conditions is a list of dicts with $match + match_conditions[0]["$match"].update(expr_conditions) + else: + match_conditions.append({"$match": expr_conditions}) + + return match_conditions + + def _process_expression(self, expr, match_conditions, remaining_conditions): + """ + Process an expression and extract optimizable conditions. + + Args: + expr: The expression to process + match_conditions: List to append optimized match conditions + remaining_conditions: List to append non-optimizable conditions + """ + if isinstance(expr, dict): + # Check if this is an $and operation + has_and = "$and" in expr + has_or = "$or" in expr + # Do a top-level check for $and or $or because these should inform + # If they fail, they should failover to a remaining conditions list + # There's probably a better way to do this, but this is a start + if has_and: + self._process_logical_conditions( + "$and", expr["$and"], match_conditions, remaining_conditions + ) + if has_or: + self._process_logical_conditions( + "$or", expr["$or"], match_conditions, remaining_conditions + ) + if not has_and and not has_or: + # Process single condition + optimized = convert_expression(expr) + print(f"{expr=}") + if optimized: + match_conditions.append({"$match": optimized}) + else: + remaining_conditions.append(expr) + print(f"{match_conditions=}") + print(f"{remaining_conditions=}") + else: + # Can't optimize + remaining_conditions.append(expr) + + def _process_logical_conditions( + self, logical_op, logical_conditions, match_conditions, remaining_conditions + ): + """ + Process conditions within a logical array. + + Args: + logical_conditions: List of conditions within logical operator + match_conditions: List to append optimized match conditions + remaining_conditions: List to append non-optimizable conditions + """ + optimized_conditions = [] + for condition in logical_conditions: + if isinstance(condition, dict): + if optimized := convert_expression(condition): + optimized_conditions.append(optimized) + else: + remaining_conditions.append(condition) + else: + remaining_conditions.append(condition) + match_conditions.append({"$match": {logical_op: optimized_conditions}}) diff --git a/query_conversion/query_optimizer.py b/query_conversion/query_optimizer.py deleted file mode 100644 index c0140164b..000000000 --- a/query_conversion/query_optimizer.py +++ /dev/null @@ -1,267 +0,0 @@ -from copy import deepcopy - -from expression_converters import convert_expression - - -class QueryOptimizer: - def optimize(self, expr): - """ - Takes an MQL query with $expr and optimizes it by extracting - optimizable conditions into separate $match stages. - - Args: - expr_query: Dictionary containing the $expr query - - Returns: - List of optimized match conditions - """ - expr_query = deepcopy(expr) - - if "$expr" not in expr_query: - return [expr_query] - - if expr_query["$expr"] == {}: - return [{"$match": {}}] - - expr_content = expr_query["$expr"] - match_conditions = [] - remaining_expr_conditions = [] - - # Handle the expression content - self._process_expression( - expr_content, match_conditions, remaining_expr_conditions - ) - - # If there are remaining conditions that couldn't be optimized, - # keep them in an $expr - if remaining_expr_conditions: - if len(remaining_expr_conditions) == 1: - expr_conditions = {"$expr": remaining_expr_conditions[0]} - else: - expr_conditions = {"$expr": {"$and": remaining_expr_conditions}} - - if match_conditions: - # This assumes match_conditions is a list of dicts with $match - match_conditions[0]["$match"].update(expr_conditions) - - return match_conditions - - def _process_expression(self, expr, match_conditions, remaining_conditions): - """ - Process an expression and extract optimizable conditions. - - Args: - expr: The expression to process - match_conditions: List to append optimized match conditions - remaining_conditions: List to append non-optimizable conditions - """ - if isinstance(expr, dict): - # Check if this is an $and operation - has_and = "$and" in expr - has_or = "$or" in expr - # Do a top-level check for $and or $or because these should inform - # If they fail, they should failover to a remaining conditions list - # There's probably a better way to do this, but this is a start - if has_and: - self._process_logical_conditions( - "$and", expr["$and"], match_conditions, remaining_conditions - ) - if has_or: - self._process_logical_conditions( - "$or", expr["$or"], match_conditions, remaining_conditions - ) - if not has_and and not has_or: - # Process single condition - optimized = convert_expression(expr) - if optimized: - match_conditions.append({"$match": optimized}) - else: - remaining_conditions.append(expr) - else: - # Can't optimize - remaining_conditions.append(expr) - - def _process_logical_conditions( - self, logical_op, logical_conditions, match_conditions, remaining_conditions - ): - """ - Process conditions within a logical array. - - Args: - logical_conditions: List of conditions within logical operator - match_conditions: List to append optimized match conditions - remaining_conditions: List to append non-optimizable conditions - """ - optimized_conditions = [] - for condition in logical_conditions: - if isinstance(condition, dict): - if optimized := convert_expression(condition): - optimized_conditions.append(optimized) - else: - remaining_conditions.append(condition) - else: - remaining_conditions.append(condition) - match_conditions.append({"$match": {logical_op: optimized_conditions}}) - - -def test_optimizer(optimizer, query, idx): - """ - Test the QueryOptimizer with various conditions. - """ - print("Before optimization:") - pprint(query) - print("After optimization:") - pprint(optimizer.optimize(query)) - print() - - -# Example usage and test cases -if __name__ == "__main__": - optimizer = QueryOptimizer() - from pprint import pprint - - # Test case 1: Simple $eq - query1 = {"$expr": {"$eq": ["$status", "active"]}}, ("Test 1 - Simple $eq:") - - # Test case 2: Simple $in - query2 = ( - {"$expr": {"$in": ["$category", ["electronics", "books", "clothing"]]}}, - ("Test 2 - Simple $in:"), - ) - - # Test case 3: $and with multiple optimizable conditions - query3 = ( - { - "$expr": { - "$and": [ - {"$eq": ["$status", "active"]}, - {"$in": ["$category", ["electronics", "books"]]}, - {"$eq": ["$verified", True]}, - ] - } - }, - ("Test 3 - $and with optimizable conditions:"), - ) - - # Test case 4: Mixed optimizable and non-optimizable conditions - query4 = ( - { - "$expr": { - "$and": [ - {"$eq": ["$status", "active"]}, - {"$gt": ["$price", 100]}, # Not optimizable - {"$in": ["$category", ["electronics"]]}, - ] - } - }, - ("Test 4 - Mixed conditions:"), - ) - - # Test case 5: Non-optimizable condition - query5 = ( - {"$expr": {"$gt": ["$price", 100]}}, - ("Test 5 - Non-optimizable condition:"), - ) - - # Test case 6: Nested $or conditions - query6 = ( - { - "$expr": { - "$or": [ - {"$eq": ["$status", "active"]}, - {"$in": ["$category", ["electronics", "books"]]}, - {"$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}]}, - ] - } - }, - ("Test 6 - Nested $or conditions:"), - ) - - # Test case 7: Complex nested conditions with non-optimizable parts - query7 = ( - { - "$expr": { - "$and": [ - { - "$or": [ - {"$eq": ["$status", "active"]}, - {"$gt": ["$views", 1000]}, - ] - }, - {"$in": ["$category", ["electronics", "books"]]}, - {"$eq": ["$verified", True]}, - {"$gt": ["$price", 50]}, # Not optimizable - ] - } - }, - ("Test 7 - Complex nested conditions:"), - ) - - # Test case 8: London $in test case - query8 = ( - {"$expr": {"$in": ["$author_city", ["London"]]}}, - ("Test 8 - London $in test case:"), - ) - - # Test case 9: Deeply nested logical operations - query9 = ( - { - "$expr": { - "$and": [ - { - "$or": [ - {"$eq": ["$type", "premium"]}, - { - "$and": [ - {"$eq": ["$type", "standard"]}, - {"$in": ["$region", ["US", "CA"]]}, - ] - }, - ] - }, - {"$eq": ["$active", True]}, - ] - } - }, - ("Test 9 - Deeply nested logical operations:"), - ) - - # Test case 10: Deeply nested logical operations, with Variable! - query10 = ( - { - "$expr": { - "$and": [ - { - "$or": [ - {"$eq": ["$type", "premium"]}, - { - "$and": [ - {"$eq": ["$type", "standard"]}, - {"$in": ["$region", ["US", "CA"]]}, - ] - }, - ] - }, - {"$eq": ["$active", True]}, - ] - } - }, - ("Test 10 - Deeply nested logical operations, with Variables!"), - ) - - queries = [ - query1, - query2, - query3, - query4, - query5, - query6, - query7, - query8, - query9, - query10, - ] - - for idx, (query, description) in enumerate(queries, start=1): - print(description) - test_optimizer(optimizer, query, idx) diff --git a/tests/expression_converter_/models.py b/tests/expression_converter_/models.py index dad8d8853..234c2cec2 100644 --- a/tests/expression_converter_/models.py +++ b/tests/expression_converter_/models.py @@ -1,9 +1,18 @@ from django.db import models -class Person(models.Model): +class Author(models.Model): name = models.CharField(max_length=100) age = models.IntegerField() + author_city = models.CharField(max_length=100) def __str__(self): return self.name + + +class Book(models.Model): + title = models.CharField(max_length=10) + author = models.ForeignKey(Author, models.CASCADE) + + def __str__(self): + return self.title diff --git a/tests/expression_converter_/test_match_conversion.py b/tests/expression_converter_/test_match_conversion.py new file mode 100644 index 000000000..f3db7cd2c --- /dev/null +++ b/tests/expression_converter_/test_match_conversion.py @@ -0,0 +1,232 @@ +from django.test import SimpleTestCase, TestCase + +from django_mongodb_backend.query_conversion.query_optimizer import QueryOptimizer + +from .models import Author + +optimizer = QueryOptimizer() + + +class QueryOptimizerTests(SimpleTestCase): + def assertOptimizerEqual(self, input, expected): + result = QueryOptimizer().optimize(input) + self.assertEqual(result, expected) + + def test_multiple_optimizable_conditions(self): + expr = { + "$expr": { + "$and": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + ] + } + } + expected = { + "$match": { + "$and": [ + {"status": "active"}, + {"category": {"$in": ["electronics", "books"]}}, + {"verified": True}, + ] + } + } + self.assertOptimizerEqual(expr, expected) + + def test_mixed_optimizable_and_non_optimizable_conditions(self): + expr = { + "$expr": { + "$and": [ + {"$eq": ["$status", "active"]}, + {"$gt": ["$price", 100]}, # Not optimizable + {"$in": ["$category", ["electronics"]]}, + ] + } + } + expected = { + "$match": { + "$and": [{"status": "active"}, {"category": {"$in": ["electronics"]}}], + "$expr": {"$gt": ["$price", 100]}, + } + } + self.assertOptimizerEqual(expr, expected) + + def test_non_optimizable_condition(self): + expr = {"$expr": {"$gt": ["$price", 100]}} + expected = { + "$match": { + "$expr": {"$gt": ["$price", 100]}, + } + } + self.assertOptimizerEqual(expr, expected) + + def test_nested_logical_conditions(self): + expr = { + "$expr": { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + {"$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}]}, + ] + } + } + expected = { + "$match": { + "$or": [ + {"status": "active"}, + {"category": {"$in": ["electronics", "books"]}}, + ], + "$and": [ + {"verified": True}, + {"price": {"$gt": 50}}, + ], + } + } + self.assertOptimizerEqual(expr, expected) + + def test_complex_nested_with_non_optimizable_parts(self): + expr = { + "$expr": { + "$and": [ + { + "$or": [ # Not optimizable because of $gt + {"$eq": ["$status", "active"]}, + {"$gt": ["$views", 1000]}, + ] + }, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + {"$gt": ["$price", 50]}, # Not optimizable + ] + } + } + expected = { + "$match": { + "$and": [ + {"category": {"$in": ["electronics", "books"]}}, + {"verified": True}, + ], + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$gt": ["$views", 1000]}, + ] + }, + {"$gt": ["$price", 50]}, + ] + }, + } + } + self.assertOptimizerEqual(expr, expected) + + def test_london_in_case(self): + expr = {"$expr": {"$in": ["$author_city", ["London"]]}} + expected = {"$match": {"author_city": {"$in": ["London"]}}} + self.assertOptimizerEqual(expr, expected) + + def test_deeply_nested_logical_operators(self): + expr = { + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + {"$eq": ["$type", "standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, + ] + }, + {"$eq": ["$active", True]}, + ] + } + } + expected = { + "$match": { + "$and": [ + { + "$or": [ + {"type": "premium"}, + { + "$and": [ + {"type": "standard"}, + {"region": {"$in": ["US", "CA"]}}, + ] + }, + ] + }, + {"active": True}, + ] + } + } + self.assertOptimizerEqual(expr, expected) + + def test_deeply_nested_logical_operator_with_variable(self): + expr = { + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + # Not optimizable because of Variable + {"$eq": ["$type", "$$standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, + ] + }, + {"$eq": ["$active", True]}, + ] + } + } + expected = { + "$match": { + "$expr": { + "$and": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + {"$eq": ["$type", "$$standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, + ] + }, + "$and": [{"active": True}], + } + } + self.assertOptimizerEqual(expr, expected) + + +class OptimizedMatchMQLTests(TestCase): + def test_in_query(self): + with self.assertNumQueries(1) as ctx: + list(Author.objects.filter(author_city__in=["London"])) + query = ctx.captured_queries[0]["sql"] + expected = ( + "db.queries__author.aggregate([{'$match': {'author_city': {'$in': ['London']}}}])" + ) + self.assertEqual(query, expected) + + def test_eq_query(self): + with self.assertNumQueries(1) as ctx: + list(Author.objects.filter(name="Alice")) + query = ctx.captured_queries[0]["sql"] + expected = "db.queries__author.aggregate([{'$match': {'name': 'Alice'}}])" + self.assertEqual(query, expected) + + def test_eq_and_in_query(self): + with self.assertNumQueries(1) as ctx: + list(Author.objects.filter(name="Alice", author_city__in=["London", "New York"])) + query = ctx.captured_queries[0]["sql"] + expected = ( + "db.queries__author.aggregate([{'$match': {'$and': [{'name': 'Alice'}, " + "{'author_city': {'$in': ['London', 'New York']}}]}}])" + ) + self.assertEqual(query, expected) diff --git a/tests/expression_converter_/test_op_expressions.py b/tests/expression_converter_/test_op_expressions.py new file mode 100644 index 000000000..545522026 --- /dev/null +++ b/tests/expression_converter_/test_op_expressions.py @@ -0,0 +1,195 @@ +from datetime import datetime +from uuid import UUID + +from bson import Decimal128 +from django.test import SimpleTestCase + +from django_mongodb_backend.query_conversion.expression_converters import convert_expression + + +class TestBaseExpressionConversionCase(SimpleTestCase): + CONVERTIBLE_TYPES = { + "int": 42, + "float": 3.14, + "decimal128": Decimal128(3.14), + "boolean": True, + "NoneType": None, + "string": "string", + "datetime": datetime.datetime.utcnow(), + "duration": datetime.timedelta(days=5, hours=3), + "uuid": UUID("12345678123456781234567812345678"), + } + + def assertConversionEqual(self, input, expected): + result = convert_expression(input) + self.assertEqual(result, expected) + + def test_non_dict_expression(self): + expr = ["$status", "active"] + expected = expr # Should remain unchanged + self.assertConversionEqual(expr, expected) + + def test_empty_dict_expression(self): + expr = {} + expected = expr # Should remain unchanged + self.assertConversionEqual(expr, expected) + + def test_non_convertible(self): + expr = {"$gt": ["$price", 100]} + expected = expr # Should remain unchanged + self.assertConversionEqual(expr, expected) + + def _test_conversion_various_types(self, conversion_test): + for _type, val in self.CONVERTIBLE_TYPES.items(): + with self.subTest(_type=_type, val=val): + self.conversion_test(val) + + +class TestEqExprConversionCase(TestBaseExpressionConversionCase): + def test_eq_conversion(self): + expr = {"$eq": ["$status", "active"]} + expected = {"status": "active"} + self.assertConversionEqual(expr, expected) + + def test_eq_conversion_non_string_field(self): + expr = {"$eq": [123, "active"]} + expected = expr + self.assertConversionEqual(expr, expected) + + def test_eq_no_conversion_dict_value(self): + expr = {"$eq": ["$status", {"$gt": 5}]} + expected = expr + self.assertConversionEqual(expr, expected) + + def _test_eq_conversion_valid_type(self, _type): + expr = {"$eq": ["$age", _type]} + expected = {"age": _type} + self.assertConversionEqual(expr, expected) + + def _test_eq_conversion_valid_array_type(self, _type): + expr = {"$eq": ["$age", _type]} + expected = {"age": _type} + self.assertConversionEqual(expr, expected) + + def test_eq_conversion_various_types(self): + self._test_conversion_various_types(self._test_eq_conversion_valid_type) + + def test_eq_conversion_various_array_types(self): + self._test_conversion_various_types(self._test_eq_conversion_valid_array_type) + + +class TestInExprConversionCase(TestBaseExpressionConversionCase): + def test_in_conversion(self): + expr = {"$in": ["$category", ["electronics", "books", "clothing"]]} + expected = {"category": {"$in": ["electronics", "books", "clothing"]}} + self.assertConversionEqual(expr, expected) + + def test_in_conversion_non_string_field(self): + expr = {"$in": [123, ["electronics", "books"]]} + expected = expr + self.assertConversionEqual(expr, expected) + + def test_in_no_conversion_dict_value(self): + expr = {"$in": ["$status", [{"bad": "val"}]]} + expected = expr + self.assertConversionEqual(expr, expected) + + def _test_in_conversion_valid_type(self, _type): + expr = {"$in": ["$age", (_type,)]} + expected = {"age": _type} + self.assertConversionEqual(expr, expected) + + def test_in_conversion_various_types(self): + for _type, val in self.CONVERTIBLE_TYPES.items(): + with self.subTest(_type=_type, val=val): + self._test_in_conversion_valid_type(val) + + +class TestLogicalExpressionConversionCase(TestBaseExpressionConversionCase): + def test_logical_and_conversion(self): + expr = { + "$and": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + ] + } + expected = { + "$and": [ + {"status": "active"}, + {"category": {"$in": ["electronics", "books"]}}, + {"verified": True}, + ] + } + self.assertConversionEqual(expr, expected) + + def test_logical_or_conversion(self): + expr = { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + ] + } + expected = { + "$or": [ + {"status": "active"}, + {"category": {"$in": ["electronics", "books"]}}, + ] + } + self.assertConversionEqual(expr, expected) + + def test_logical_or_conversion_failure(self): + expr = { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$in": ["$category", ["electronics", "books"]]}, + { + "$and": [ + {"verified": True}, + {"$gt": ["$price", 50]}, # Not optimizable + ] + }, + ] + } + self.assertConversionEqual(expr, expr) + + def test_logical_mixed_conversion(self): + expr = { + "$and": [ + { + "$or": [ + {"$eq": ["$status", "active"]}, + ] + }, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + ] + } + expected = { + "$and": [ + { + "$or": [ + {"status": "active"}, + ] + }, + {"category": {"$in": ["electronics", "books"]}}, + {"verified": True}, + ] + } + self.assertConversionEqual(expr, expected) + + def test_logical_mixed_conversion_failure(self): + expr = { + "$and": [ + { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$gt": ["$views", 1000]}, + ] + }, + {"$in": ["$category", ["electronics", "books"]]}, + {"$eq": ["$verified", True]}, + {"$gt": ["$price", 50]}, # Not optimizable + ] + } + self.assertConversionEqual(expr, expr) diff --git a/tests/expression_converter_/tests.py b/tests/expression_converter_/tests.py deleted file mode 100644 index e69de29bb..000000000 From cc806656e2b449dcfbd912670d5fb34013a88c03 Mon Sep 17 00:00:00 2001 From: Jib Date: Mon, 25 Aug 2025 02:56:15 +0300 Subject: [PATCH 3/9] execute conversion query on self.match_mql stage --- django_mongodb_backend/query.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 8bad93463..b41338251 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -11,6 +11,8 @@ from django.db.models.sql.where import AND, OR, XOR, ExtraWhere, NothingNode, WhereNode from pymongo.errors import BulkWriteError, DuplicateKeyError, PyMongoError +from django_mongodb_backend.query_conversion.query_optimizer import QueryOptimizer + def wrap_database_errors(func): @wraps(func) @@ -87,7 +89,7 @@ def get_pipeline(self): for query in self.subqueries or (): pipeline.extend(query.get_pipeline()) if self.match_mql: - pipeline.append({"$match": self.match_mql}) + pipeline.append(QueryOptimizer().optimize(self.match_mql)) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.project_fields: From 23631f64a339b78185b4299becf455e85c99b772 Mon Sep 17 00:00:00 2001 From: Jib Date: Mon, 25 Aug 2025 03:01:06 +0300 Subject: [PATCH 4/9] Fix pre-commit and loose print statements --- .../query_conversion/expression_converters.py | 2 +- django_mongodb_backend/query_conversion/query_optimizer.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/django_mongodb_backend/query_conversion/expression_converters.py b/django_mongodb_backend/query_conversion/expression_converters.py index af40b6b03..42c9d0ffb 100644 --- a/django_mongodb_backend/query_conversion/expression_converters.py +++ b/django_mongodb_backend/query_conversion/expression_converters.py @@ -123,7 +123,7 @@ def convert_expression(expr): Optimized match condition or None if not optimizable """ if isinstance(expr, dict) and len(expr) == 1: - op = list(expr.keys())[0] + op = next(iter(expr.keys())) if op in OPTIMIZABLE_OPS: return OPTIMIZABLE_OPS[op].optimize(expr[op]) return None diff --git a/django_mongodb_backend/query_conversion/query_optimizer.py b/django_mongodb_backend/query_conversion/query_optimizer.py index 5eb4f1bc1..2d7020828 100644 --- a/django_mongodb_backend/query_conversion/query_optimizer.py +++ b/django_mongodb_backend/query_conversion/query_optimizer.py @@ -73,13 +73,10 @@ def _process_expression(self, expr, match_conditions, remaining_conditions): if not has_and and not has_or: # Process single condition optimized = convert_expression(expr) - print(f"{expr=}") if optimized: match_conditions.append({"$match": optimized}) else: remaining_conditions.append(expr) - print(f"{match_conditions=}") - print(f"{remaining_conditions=}") else: # Can't optimize remaining_conditions.append(expr) From f2ea829f774c90020c48a3ffd96d5dc7e86128ad Mon Sep 17 00:00:00 2001 From: Jib Date: Mon, 25 Aug 2025 03:12:01 +0300 Subject: [PATCH 5/9] extend rather than append --- django_mongodb_backend/query.py | 2 +- .../query_conversion/expression_converters.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index b41338251..2f862932b 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -89,7 +89,7 @@ def get_pipeline(self): for query in self.subqueries or (): pipeline.extend(query.get_pipeline()) if self.match_mql: - pipeline.append(QueryOptimizer().optimize(self.match_mql)) + pipeline.extend(QueryOptimizer().convert_expr_to_match(self.match_mql)) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.project_fields: diff --git a/django_mongodb_backend/query_conversion/expression_converters.py b/django_mongodb_backend/query_conversion/expression_converters.py index 42c9d0ffb..2564058f6 100644 --- a/django_mongodb_backend/query_conversion/expression_converters.py +++ b/django_mongodb_backend/query_conversion/expression_converters.py @@ -8,7 +8,7 @@ class _BaseExpressionConverter: """ @classmethod - def optimize(cls, expr): + def convert(cls, expr): raise NotImplementedError("Subclasses should implement this method.") @classmethod @@ -38,7 +38,7 @@ class _EqExpressionConverter(_BaseExpressionConverter): """Convert $eq operation to a $match compatible query.""" @classmethod - def optimize(cls, eq_args): + def convert(cls, eq_args): if isinstance(eq_args, list) and len(eq_args) == 2: field_expr, value = eq_args @@ -58,7 +58,7 @@ class _InExpressionConverter(_BaseExpressionConverter): """Convert $in operation to a $match compatible query.""" @classmethod - def optimize(cls, in_args): + def convert(cls, in_args): if isinstance(in_args, list) and len(in_args) == 2: field_expr, values = in_args @@ -77,7 +77,7 @@ class _LogicalExpressionConverter(_BaseExpressionConverter): """Generic for converting logical operations to a $match compatible query.""" @classmethod - def optimize(cls, combined_conditions): + def convert(cls, combined_conditions): if isinstance(combined_conditions, list): optimized_conditions = [] for condition in combined_conditions: @@ -125,5 +125,5 @@ def convert_expression(expr): if isinstance(expr, dict) and len(expr) == 1: op = next(iter(expr.keys())) if op in OPTIMIZABLE_OPS: - return OPTIMIZABLE_OPS[op].optimize(expr[op]) + return OPTIMIZABLE_OPS[op].convert(expr[op]) return None From 4c3e891d3bdaf5a8d8486826b351d4f5f84b4c81 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 27 Aug 2025 14:14:42 -0700 Subject: [PATCH 6/9] Fix expression_converter tests --- django_mongodb_backend/query.py | 3 +- .../test_match_conversion.py | 175 +++++++++--------- .../test_op_expressions.py | 58 +++--- 3 files changed, 128 insertions(+), 108 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 2f862932b..865b4ae1f 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -57,6 +57,7 @@ def __init__(self, compiler): # $lookup stage that encapsulates the pipeline for performing a nested # subquery. self.subquery_lookup = None + self.query_optimizer = QueryOptimizer() def __repr__(self): return f"" @@ -89,7 +90,7 @@ def get_pipeline(self): for query in self.subqueries or (): pipeline.extend(query.get_pipeline()) if self.match_mql: - pipeline.extend(QueryOptimizer().convert_expr_to_match(self.match_mql)) + pipeline.extend(self.query_optimizer.convert_expr_to_match(self.match_mql)) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.project_fields: diff --git a/tests/expression_converter_/test_match_conversion.py b/tests/expression_converter_/test_match_conversion.py index f3db7cd2c..4bf222b98 100644 --- a/tests/expression_converter_/test_match_conversion.py +++ b/tests/expression_converter_/test_match_conversion.py @@ -9,7 +9,7 @@ class QueryOptimizerTests(SimpleTestCase): def assertOptimizerEqual(self, input, expected): - result = QueryOptimizer().optimize(input) + result = QueryOptimizer().convert_expr_to_match(input) self.assertEqual(result, expected) def test_multiple_optimizable_conditions(self): @@ -22,15 +22,17 @@ def test_multiple_optimizable_conditions(self): ] } } - expected = { - "$match": { - "$and": [ - {"status": "active"}, - {"category": {"$in": ["electronics", "books"]}}, - {"verified": True}, - ] + expected = [ + { + "$match": { + "$and": [ + {"status": "active"}, + {"category": {"$in": ["electronics", "books"]}}, + {"verified": True}, + ] + } } - } + ] self.assertOptimizerEqual(expr, expected) def test_mixed_optimizable_and_non_optimizable_conditions(self): @@ -43,21 +45,25 @@ def test_mixed_optimizable_and_non_optimizable_conditions(self): ] } } - expected = { - "$match": { - "$and": [{"status": "active"}, {"category": {"$in": ["electronics"]}}], - "$expr": {"$gt": ["$price", 100]}, + expected = [ + { + "$match": { + "$and": [{"status": "active"}, {"category": {"$in": ["electronics"]}}], + "$expr": {"$gt": ["$price", 100]}, + } } - } + ] self.assertOptimizerEqual(expr, expected) def test_non_optimizable_condition(self): expr = {"$expr": {"$gt": ["$price", 100]}} - expected = { - "$match": { - "$expr": {"$gt": ["$price", 100]}, + expected = [ + { + "$match": { + "$expr": {"$gt": ["$price", 100]}, + } } - } + ] self.assertOptimizerEqual(expr, expected) def test_nested_logical_conditions(self): @@ -70,18 +76,14 @@ def test_nested_logical_conditions(self): ] } } - expected = { - "$match": { - "$or": [ - {"status": "active"}, - {"category": {"$in": ["electronics", "books"]}}, - ], - "$and": [ - {"verified": True}, - {"price": {"$gt": 50}}, - ], + expected = [ + { + "$match": { + "$expr": {"$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}]}, + "$or": [{"status": "active"}, {"category": {"$in": ["electronics", "books"]}}], + } } - } + ] self.assertOptimizerEqual(expr, expected) def test_complex_nested_with_non_optimizable_parts(self): @@ -100,30 +102,32 @@ def test_complex_nested_with_non_optimizable_parts(self): ] } } - expected = { - "$match": { - "$and": [ - {"category": {"$in": ["electronics", "books"]}}, - {"verified": True}, - ], - "$expr": { + expected = [ + { + "$match": { "$and": [ - { - "$or": [ - {"$eq": ["$status", "active"]}, - {"$gt": ["$views", 1000]}, - ] - }, - {"$gt": ["$price", 50]}, - ] - }, + {"category": {"$in": ["electronics", "books"]}}, + {"verified": True}, + ], + "$expr": { + "$and": [ + { + "$or": [ + {"$eq": ["$status", "active"]}, + {"$gt": ["$views", 1000]}, + ] + }, + {"$gt": ["$price", 50]}, + ] + }, + } } - } + ] self.assertOptimizerEqual(expr, expected) def test_london_in_case(self): expr = {"$expr": {"$in": ["$author_city", ["London"]]}} - expected = {"$match": {"author_city": {"$in": ["London"]}}} + expected = [{"$match": {"author_city": {"$in": ["London"]}}}] self.assertOptimizerEqual(expr, expected) def test_deeply_nested_logical_operators(self): @@ -145,24 +149,26 @@ def test_deeply_nested_logical_operators(self): ] } } - expected = { - "$match": { - "$and": [ - { - "$or": [ - {"type": "premium"}, - { - "$and": [ - {"type": "standard"}, - {"region": {"$in": ["US", "CA"]}}, - ] - }, - ] - }, - {"active": True}, - ] + expected = [ + { + "$match": { + "$and": [ + { + "$or": [ + {"type": "premium"}, + { + "$and": [ + {"type": "standard"}, + {"region": {"$in": ["US", "CA"]}}, + ] + }, + ] + }, + {"active": True}, + ] + } } - } + ] self.assertOptimizerEqual(expr, expected) def test_deeply_nested_logical_operator_with_variable(self): @@ -185,22 +191,24 @@ def test_deeply_nested_logical_operator_with_variable(self): ] } } - expected = { - "$match": { - "$expr": { - "$and": [ - {"$eq": ["$type", "premium"]}, - { - "$and": [ - {"$eq": ["$type", "$$standard"]}, - {"$in": ["$region", ["US", "CA"]]}, - ] - }, - ] - }, - "$and": [{"active": True}], + expected = [ + { + "$match": { + "$expr": { + "$or": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + {"$eq": ["$type", "$$standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, + ] + }, + "$and": [{"active": True}], + } } - } + ] self.assertOptimizerEqual(expr, expected) @@ -210,7 +218,8 @@ def test_in_query(self): list(Author.objects.filter(author_city__in=["London"])) query = ctx.captured_queries[0]["sql"] expected = ( - "db.queries__author.aggregate([{'$match': {'author_city': {'$in': ['London']}}}])" + "db.expression_converter__author.aggregate([{'$match': " + + "{'author_city': {'$in': ('London',)}}}])" ) self.assertEqual(query, expected) @@ -218,7 +227,7 @@ def test_eq_query(self): with self.assertNumQueries(1) as ctx: list(Author.objects.filter(name="Alice")) query = ctx.captured_queries[0]["sql"] - expected = "db.queries__author.aggregate([{'$match': {'name': 'Alice'}}])" + expected = "db.expression_converter__author.aggregate([{'$match': {'name': 'Alice'}}])" self.assertEqual(query, expected) def test_eq_and_in_query(self): @@ -226,7 +235,7 @@ def test_eq_and_in_query(self): list(Author.objects.filter(name="Alice", author_city__in=["London", "New York"])) query = ctx.captured_queries[0]["sql"] expected = ( - "db.queries__author.aggregate([{'$match': {'$and': [{'name': 'Alice'}, " - "{'author_city': {'$in': ['London', 'New York']}}]}}])" + "db.expression_converter__author.aggregate([{'$match': {'$and': " + + "[{'author_city': {'$in': ('London', 'New York')}}, {'name': 'Alice'}]}}])" ) self.assertEqual(query, expected) diff --git a/tests/expression_converter_/test_op_expressions.py b/tests/expression_converter_/test_op_expressions.py index 545522026..cd4c0e922 100644 --- a/tests/expression_converter_/test_op_expressions.py +++ b/tests/expression_converter_/test_op_expressions.py @@ -1,4 +1,4 @@ -from datetime import datetime +import datetime from uuid import UUID from bson import Decimal128 @@ -11,11 +11,11 @@ class TestBaseExpressionConversionCase(SimpleTestCase): CONVERTIBLE_TYPES = { "int": 42, "float": 3.14, - "decimal128": Decimal128(3.14), + "decimal128": Decimal128("3.14"), "boolean": True, "NoneType": None, "string": "string", - "datetime": datetime.datetime.utcnow(), + "datetime": datetime.datetime.now(datetime.UTC), "duration": datetime.timedelta(days=5, hours=3), "uuid": UUID("12345678123456781234567812345678"), } @@ -24,25 +24,26 @@ def assertConversionEqual(self, input, expected): result = convert_expression(input) self.assertEqual(result, expected) + def assertNotOptimizable(self, input): + result = convert_expression(input) + self.assertIsNone(result) + def test_non_dict_expression(self): expr = ["$status", "active"] - expected = expr # Should remain unchanged - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def test_empty_dict_expression(self): expr = {} - expected = expr # Should remain unchanged - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def test_non_convertible(self): expr = {"$gt": ["$price", 100]} - expected = expr # Should remain unchanged - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def _test_conversion_various_types(self, conversion_test): for _type, val in self.CONVERTIBLE_TYPES.items(): with self.subTest(_type=_type, val=val): - self.conversion_test(val) + conversion_test(val) class TestEqExprConversionCase(TestBaseExpressionConversionCase): @@ -51,15 +52,13 @@ def test_eq_conversion(self): expected = {"status": "active"} self.assertConversionEqual(expr, expected) - def test_eq_conversion_non_string_field(self): + def test_eq_no_conversion_non_string_field(self): expr = {"$eq": [123, "active"]} - expected = expr - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def test_eq_no_conversion_dict_value(self): expr = {"$eq": ["$status", {"$gt": 5}]} - expected = expr - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def _test_eq_conversion_valid_type(self, _type): expr = {"$eq": ["$age", _type]} @@ -84,19 +83,30 @@ def test_in_conversion(self): expected = {"category": {"$in": ["electronics", "books", "clothing"]}} self.assertConversionEqual(expr, expected) - def test_in_conversion_non_string_field(self): + def test_in_no_conversion_non_string_field(self): expr = {"$in": [123, ["electronics", "books"]]} - expected = expr - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def test_in_no_conversion_dict_value(self): expr = {"$in": ["$status", [{"bad": "val"}]]} - expected = expr - self.assertConversionEqual(expr, expected) + self.assertNotOptimizable(expr) def _test_in_conversion_valid_type(self, _type): - expr = {"$in": ["$age", (_type,)]} - expected = {"age": _type} + expr = { + "$in": [ + "$age", + [ + _type, + ], + ] + } + expected = { + "age": { + "$in": [ + _type, + ] + } + } self.assertConversionEqual(expr, expected) def test_in_conversion_various_types(self): @@ -151,7 +161,7 @@ def test_logical_or_conversion_failure(self): }, ] } - self.assertConversionEqual(expr, expr) + self.assertNotOptimizable(expr) def test_logical_mixed_conversion(self): expr = { @@ -192,4 +202,4 @@ def test_logical_mixed_conversion_failure(self): {"$gt": ["$price", 50]}, # Not optimizable ] } - self.assertConversionEqual(expr, expr) + self.assertNotOptimizable(expr) From f9a69cc9e642e73e2a8d1140fb09717d540cc0df Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 27 Aug 2025 17:11:48 -0700 Subject: [PATCH 7/9] WIP --- django_mongodb_backend/query.py | 7 ++++ .../query_conversion/query_optimizer.py | 37 ++++++++++++------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 865b4ae1f..29bd14851 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -1,3 +1,4 @@ +import pprint from functools import reduce, wraps from operator import add as add_operator @@ -90,7 +91,13 @@ def get_pipeline(self): for query in self.subqueries or (): pipeline.extend(query.get_pipeline()) if self.match_mql: + optimized = self.query_optimizer.convert_expr_to_match(self.match_mql) + if any("$and" in str(cond) for cond in optimized) and not "$and" in str(self.match_mql["$expr"]): + print(f"$AND MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") + if any("$or" in str(cond) for cond in optimized) and not "$or" in str(self.match_mql["$expr"]): + print(f"$OR MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") pipeline.extend(self.query_optimizer.convert_expr_to_match(self.match_mql)) + # pipeline.append({"$match": self.match_mql}) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.project_fields: diff --git a/django_mongodb_backend/query_conversion/query_optimizer.py b/django_mongodb_backend/query_conversion/query_optimizer.py index 2d7020828..2d6c03a88 100644 --- a/django_mongodb_backend/query_conversion/query_optimizer.py +++ b/django_mongodb_backend/query_conversion/query_optimizer.py @@ -24,15 +24,14 @@ def convert_expr_to_match(self, expr): return [{"$match": {}}] expr_content = expr_query["$expr"] - match_conditions = [] - remaining_expr_conditions = [] # Handle the expression content - self._process_expression(expr_content, match_conditions, remaining_expr_conditions) + match_conditions, remaining_expr_conditions = self._process_expression(expr_content) # If there are remaining conditions that couldn't be optimized, # keep them in an $expr if remaining_expr_conditions: + print(f"Remaining conditions: {remaining_expr_conditions}, match_conditions: {match_conditions}") if len(remaining_expr_conditions) == 1: expr_conditions = {"$expr": remaining_expr_conditions[0]} else: @@ -44,17 +43,18 @@ def convert_expr_to_match(self, expr): else: match_conditions.append({"$match": expr_conditions}) + print(f"Original expr: {expr_query}, optimized expr: {match_conditions}") return match_conditions - def _process_expression(self, expr, match_conditions, remaining_conditions): + def _process_expression(self, expr): """ Process an expression and extract optimizable conditions. Args: expr: The expression to process - match_conditions: List to append optimized match conditions - remaining_conditions: List to append non-optimizable conditions """ + match_conditions = [] + remaining_conditions = [] if isinstance(expr, dict): # Check if this is an $and operation has_and = "$and" in expr @@ -63,13 +63,17 @@ def _process_expression(self, expr, match_conditions, remaining_conditions): # If they fail, they should failover to a remaining conditions list # There's probably a better way to do this, but this is a start if has_and: - self._process_logical_conditions( - "$and", expr["$and"], match_conditions, remaining_conditions + and_match_conditions, and_remaining_conditions = self._process_logical_conditions( + "$and", expr["$and"] ) + match_conditions.extend(and_match_conditions) + remaining_conditions.extend(and_remaining_conditions) if has_or: - self._process_logical_conditions( - "$or", expr["$or"], match_conditions, remaining_conditions + or_match_conditions, or_remaining_conditions = self._process_logical_conditions( + "$or", expr["$or"] ) + match_conditions.extend(or_match_conditions) + remaining_conditions.extend(or_remaining_conditions) if not has_and and not has_or: # Process single condition optimized = convert_expression(expr) @@ -80,19 +84,20 @@ def _process_expression(self, expr, match_conditions, remaining_conditions): else: # Can't optimize remaining_conditions.append(expr) + return match_conditions, remaining_conditions def _process_logical_conditions( - self, logical_op, logical_conditions, match_conditions, remaining_conditions + self, logical_op, logical_conditions ): """ Process conditions within a logical array. Args: logical_conditions: List of conditions within logical operator - match_conditions: List to append optimized match conditions - remaining_conditions: List to append non-optimizable conditions """ optimized_conditions = [] + match_conditions = [] + remaining_conditions = [] for condition in logical_conditions: if isinstance(condition, dict): if optimized := convert_expression(condition): @@ -101,4 +106,8 @@ def _process_logical_conditions( remaining_conditions.append(condition) else: remaining_conditions.append(condition) - match_conditions.append({"$match": {logical_op: optimized_conditions}}) + if optimized_conditions: + match_conditions.append({"$match": {logical_op: optimized_conditions}}) + else: + remaining_conditions = [{logical_op: logical_conditions}] + return match_conditions, remaining_conditions \ No newline at end of file From b16c7214b01355db4b4a980d0f127829b0c5f08a Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 28 Aug 2025 11:11:56 -0400 Subject: [PATCH 8/9] Preserve logical operators when optimizing --- django_mongodb_backend/query.py | 11 ++-- .../query_conversion/query_optimizer.py | 63 ++++++++----------- .../test_match_conversion.py | 58 ++++++++++------- 3 files changed, 66 insertions(+), 66 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 29bd14851..c77944bfa 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -1,4 +1,3 @@ -import pprint from functools import reduce, wraps from operator import add as add_operator @@ -91,11 +90,11 @@ def get_pipeline(self): for query in self.subqueries or (): pipeline.extend(query.get_pipeline()) if self.match_mql: - optimized = self.query_optimizer.convert_expr_to_match(self.match_mql) - if any("$and" in str(cond) for cond in optimized) and not "$and" in str(self.match_mql["$expr"]): - print(f"$AND MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") - if any("$or" in str(cond) for cond in optimized) and not "$or" in str(self.match_mql["$expr"]): - print(f"$OR MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") + # optimized = self.query_optimizer.convert_expr_to_match(self.match_mql) + # if any("$and" in str(cond) for cond in optimized) and not "$and" in str(self.match_mql["$expr"]): + # print(f"$AND MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") + # if any("$or" in str(cond) for cond in optimized) and not "$or" in str(self.match_mql["$expr"]): + # print(f"$OR MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") pipeline.extend(self.query_optimizer.convert_expr_to_match(self.match_mql)) # pipeline.append({"$match": self.match_mql}) if self.aggregation_pipeline: diff --git a/django_mongodb_backend/query_conversion/query_optimizer.py b/django_mongodb_backend/query_conversion/query_optimizer.py index 2d6c03a88..6140b827c 100644 --- a/django_mongodb_backend/query_conversion/query_optimizer.py +++ b/django_mongodb_backend/query_conversion/query_optimizer.py @@ -26,25 +26,10 @@ def convert_expr_to_match(self, expr): expr_content = expr_query["$expr"] # Handle the expression content - match_conditions, remaining_expr_conditions = self._process_expression(expr_content) + optimized_query = self._process_expression(expr_content) - # If there are remaining conditions that couldn't be optimized, - # keep them in an $expr - if remaining_expr_conditions: - print(f"Remaining conditions: {remaining_expr_conditions}, match_conditions: {match_conditions}") - if len(remaining_expr_conditions) == 1: - expr_conditions = {"$expr": remaining_expr_conditions[0]} - else: - expr_conditions = {"$expr": {"$and": remaining_expr_conditions}} - - if match_conditions: - # This assumes match_conditions is a list of dicts with $match - match_conditions[0]["$match"].update(expr_conditions) - else: - match_conditions.append({"$match": expr_conditions}) - - print(f"Original expr: {expr_query}, optimized expr: {match_conditions}") - return match_conditions + # print(f"Original expr:\n{json_util.dumps(expr_query)}\nOptimized expr:\n{json_util.dumps(optimized_query)}") + return optimized_query def _process_expression(self, expr): """ @@ -63,32 +48,24 @@ def _process_expression(self, expr): # If they fail, they should failover to a remaining conditions list # There's probably a better way to do this, but this is a start if has_and: - and_match_conditions, and_remaining_conditions = self._process_logical_conditions( - "$and", expr["$and"] - ) + and_match_conditions = self._process_logical_conditions("$and", expr["$and"]) match_conditions.extend(and_match_conditions) - remaining_conditions.extend(and_remaining_conditions) if has_or: - or_match_conditions, or_remaining_conditions = self._process_logical_conditions( - "$or", expr["$or"] - ) + or_match_conditions = self._process_logical_conditions("$or", expr["$or"]) match_conditions.extend(or_match_conditions) - remaining_conditions.extend(or_remaining_conditions) if not has_and and not has_or: # Process single condition optimized = convert_expression(expr) if optimized: match_conditions.append({"$match": optimized}) else: - remaining_conditions.append(expr) + remaining_conditions.append({"$match": {"$expr": expr}}) else: # Can't optimize - remaining_conditions.append(expr) - return match_conditions, remaining_conditions + remaining_conditions.append({"$expr": expr}) + return match_conditions + remaining_conditions - def _process_logical_conditions( - self, logical_op, logical_conditions - ): + def _process_logical_conditions(self, logical_op, logical_conditions): """ Process conditions within a logical array. @@ -99,15 +76,27 @@ def _process_logical_conditions( match_conditions = [] remaining_conditions = [] for condition in logical_conditions: + _remaining_conditions = [] if isinstance(condition, dict): if optimized := convert_expression(condition): optimized_conditions.append(optimized) else: - remaining_conditions.append(condition) + # print(f"Can't optimize condition: {condition}") + _remaining_conditions.append(condition) else: - remaining_conditions.append(condition) + _remaining_conditions.append(condition) + if _remaining_conditions: + # Any expressions we can't optimize must remain in an $expr that preserves the logical operator + if len(_remaining_conditions) > 1: + remaining_conditions.append({"$expr": {logical_op: _remaining_conditions}}) + else: + remaining_conditions.append({"$expr": _remaining_conditions[0]}) if optimized_conditions: - match_conditions.append({"$match": {logical_op: optimized_conditions}}) + optimized_conditions.extend(remaining_conditions) + if len(optimized_conditions) > 1: + match_conditions.append({"$match": {logical_op: optimized_conditions}}) + else: + match_conditions.append({"$match": optimized_conditions[0]}) else: - remaining_conditions = [{logical_op: logical_conditions}] - return match_conditions, remaining_conditions \ No newline at end of file + match_conditions.append({"$match": {logical_op: remaining_conditions}}) + return match_conditions diff --git a/tests/expression_converter_/test_match_conversion.py b/tests/expression_converter_/test_match_conversion.py index 4bf222b98..bc390c3be 100644 --- a/tests/expression_converter_/test_match_conversion.py +++ b/tests/expression_converter_/test_match_conversion.py @@ -48,8 +48,11 @@ def test_mixed_optimizable_and_non_optimizable_conditions(self): expected = [ { "$match": { - "$and": [{"status": "active"}, {"category": {"$in": ["electronics"]}}], - "$expr": {"$gt": ["$price", 100]}, + "$and": [ + {"status": "active"}, + {"category": {"$in": ["electronics"]}}, + {"$expr": {"$gt": ["$price", 100]}}, + ], } } ] @@ -79,8 +82,15 @@ def test_nested_logical_conditions(self): expected = [ { "$match": { - "$expr": {"$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}]}, - "$or": [{"status": "active"}, {"category": {"$in": ["electronics", "books"]}}], + "$or": [ + {"status": "active"}, + {"category": {"$in": ["electronics", "books"]}}, + { + "$expr": { + "$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}] + } + }, + ] } } ] @@ -108,18 +118,16 @@ def test_complex_nested_with_non_optimizable_parts(self): "$and": [ {"category": {"$in": ["electronics", "books"]}}, {"verified": True}, - ], - "$expr": { - "$and": [ - { + { + "$expr": { "$or": [ {"$eq": ["$status", "active"]}, {"$gt": ["$views", 1000]}, ] - }, - {"$gt": ["$price", 50]}, - ] - }, + } + }, + {"$expr": {"$gt": ["$price", 50]}}, + ] } } ] @@ -194,18 +202,22 @@ def test_deeply_nested_logical_operator_with_variable(self): expected = [ { "$match": { - "$expr": { - "$or": [ - {"$eq": ["$type", "premium"]}, - { - "$and": [ - {"$eq": ["$type", "$$standard"]}, - {"$in": ["$region", ["US", "CA"]]}, + "$and": [ + {"active": True}, + { + "$expr": { + "$or": [ + {"$eq": ["$type", "premium"]}, + { + "$and": [ + {"$eq": ["$type", "$$standard"]}, + {"$in": ["$region", ["US", "CA"]]}, + ] + }, ] - }, - ] - }, - "$and": [{"active": True}], + } + }, + ] } } ] From f1b0878dbc256959f9c28d1ccacb9e709b5e6423 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Fri, 29 Aug 2025 10:59:29 -0400 Subject: [PATCH 9/9] Update queries_ tests --- django_mongodb_backend/query.py | 6 -- .../query_conversion/query_optimizer.py | 9 +-- tests/queries_/test_explain.py | 8 +- tests/queries_/test_mql.py | 81 +++++++++---------- 4 files changed, 45 insertions(+), 59 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index c77944bfa..865b4ae1f 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -90,13 +90,7 @@ def get_pipeline(self): for query in self.subqueries or (): pipeline.extend(query.get_pipeline()) if self.match_mql: - # optimized = self.query_optimizer.convert_expr_to_match(self.match_mql) - # if any("$and" in str(cond) for cond in optimized) and not "$and" in str(self.match_mql["$expr"]): - # print(f"$AND MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") - # if any("$or" in str(cond) for cond in optimized) and not "$or" in str(self.match_mql["$expr"]): - # print(f"$OR MISMATCH MQL\nOptimized:\n\t{pprint.pformat(optimized)}\nOriginal:\n\t{self.match_mql}") pipeline.extend(self.query_optimizer.convert_expr_to_match(self.match_mql)) - # pipeline.append({"$match": self.match_mql}) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.project_fields: diff --git a/django_mongodb_backend/query_conversion/query_optimizer.py b/django_mongodb_backend/query_conversion/query_optimizer.py index 6140b827c..6c0af11be 100644 --- a/django_mongodb_backend/query_conversion/query_optimizer.py +++ b/django_mongodb_backend/query_conversion/query_optimizer.py @@ -26,10 +26,7 @@ def convert_expr_to_match(self, expr): expr_content = expr_query["$expr"] # Handle the expression content - optimized_query = self._process_expression(expr_content) - - # print(f"Original expr:\n{json_util.dumps(expr_query)}\nOptimized expr:\n{json_util.dumps(optimized_query)}") - return optimized_query + return self._process_expression(expr_content) def _process_expression(self, expr): """ @@ -81,12 +78,12 @@ def _process_logical_conditions(self, logical_op, logical_conditions): if optimized := convert_expression(condition): optimized_conditions.append(optimized) else: - # print(f"Can't optimize condition: {condition}") _remaining_conditions.append(condition) else: _remaining_conditions.append(condition) if _remaining_conditions: - # Any expressions we can't optimize must remain in an $expr that preserves the logical operator + # Any expressions we can't optimize must remain + # in an $expr that preserves the logical operator if len(_remaining_conditions) > 1: remaining_conditions.append({"$expr": {logical_op: _remaining_conditions}}) else: diff --git a/tests/queries_/test_explain.py b/tests/queries_/test_explain.py index d0e964150..6b74379b7 100644 --- a/tests/queries_/test_explain.py +++ b/tests/queries_/test_explain.py @@ -20,9 +20,7 @@ def test_object_id(self): id = ObjectId() result = Author.objects.filter(id=id).explain() parsed = json_util.loads(result) - self.assertEqual( - parsed["command"]["pipeline"], [{"$match": {"$expr": {"$eq": ["$_id", id]}}}] - ) + self.assertEqual(parsed["command"]["pipeline"], [{"$match": {"_id": id}}]) def test_non_ascii(self): """The json is dumped with ensure_ascii=False.""" @@ -32,6 +30,4 @@ def test_non_ascii(self): # non-ASCII characters. self.assertIn(name, result) parsed = json.loads(result) - self.assertEqual( - parsed["command"]["pipeline"], [{"$match": {"$expr": {"$eq": ["$name", name]}}}] - ) + self.assertEqual(parsed["command"]["pipeline"], [{"$match": {"name": name}}]) diff --git a/tests/queries_/test_mql.py b/tests/queries_/test_mql.py index fed955a9c..9b7c7bcc7 100644 --- a/tests/queries_/test_mql.py +++ b/tests/queries_/test_mql.py @@ -25,7 +25,7 @@ def test_all(self): with self.assertNumQueries(1) as ctx: list(Author.objects.all()) query = ctx.captured_queries[0]["sql"] - self.assertEqual(query, "db.queries__author.aggregate([{'$match': {'$expr': {}}}])") + self.assertEqual(query, "db.queries__author.aggregate([{'$match': {}}])") def test_join(self): with self.assertNumQueries(1) as ctx: @@ -40,12 +40,13 @@ def test_join(self): "{'$and': [{'$eq': ['$$parent__field__0', '$_id']}, " "{'$eq': ['$name', 'Bob']}]}}}], 'as': 'queries__author'}}, " "{'$unwind': '$queries__author'}, " - "{'$match': {'$expr': {'$eq': ['$queries__author.name', 'Bob']}}}])", + "{'$match': {'queries__author.name': 'Bob'}}])", ) class FKLookupConditionPushdownTests(TestCase): def test_filter_on_local_and_related_fields(self): + self.maxDiff = None with self.assertNumQueries(1) as ctx: list(Book.objects.filter(title="Don", author__name="John")) query = ctx.captured_queries[0]["sql"] @@ -57,8 +58,8 @@ def test_filter_on_local_and_related_fields(self): "{'$match': {'$expr': {'$and': [{'$eq': ['$$parent__field__0', " "'$_id']}, {'$eq': ['$name', 'John']}]}}}], 'as': " "'queries__author'}}, {'$unwind': '$queries__author'}, {'$match': " - "{'$expr': {'$and': [{'$eq': ['$queries__author.name', 'John']}, " - "{'$eq': ['$title', 'Don']}]}}}])", + "{'$and': [{'queries__author.name': 'John'}, " + "{'title': 'Don'}]}}])", ) def test_or_mixing_local_and_related_fields_is_not_pushable(self): @@ -71,9 +72,9 @@ def test_or_mixing_local_and_related_fields_is_not_pushable(self): "'queries__author', 'let': {'parent__field__0': '$author_id'}, " "'pipeline': [{'$match': {'$expr': {'$and': [{'$eq': " "['$$parent__field__0', '$_id']}]}}}], 'as': 'queries__author'}}, " - "{'$unwind': '$queries__author'}, {'$match': {'$expr': {'$or': " - "[{'$eq': ['$title', 'Don']}, {'$eq': ['$queries__author.name', " - "'John']}]}}}])", + "{'$unwind': '$queries__author'}, {'$match': {'$or': " + "[{'title': 'Don'}, {'queries__author.name': " + "'John'}]}}])", ) def test_filter_on_self_join_fields(self): @@ -90,9 +91,9 @@ def test_filter_on_self_join_fields(self): "{'parent__field__0': '$parent_id'}, 'pipeline': [{'$match': {'$expr': " "{'$and': [{'$eq': ['$$parent__field__0', '$_id']}, {'$and': [{'$eq': " "['$group_id', ObjectId('6891ff7822e475eddc20f159')]}, {'$eq': ['$name', " - "'parent']}]}]}}}], 'as': 'T2'}}, {'$unwind': '$T2'}, {'$match': {'$expr': " - "{'$and': [{'$eq': ['$T2.group_id', ObjectId('6891ff7822e475eddc20f159')]}, " - "{'$eq': ['$T2.name', 'parent']}]}}}])", + "'parent']}]}]}}}], 'as': 'T2'}}, {'$unwind': '$T2'}, {'$match': " + "{'$and': [{'T2.group_id': ObjectId('6891ff7822e475eddc20f159')}, " + "{'T2.name': 'parent'}]}}])", ) def test_filter_on_reverse_foreignkey_relation(self): @@ -107,12 +108,13 @@ def test_filter_on_reverse_foreignkey_relation(self): "['$$parent__field__0', '$order_id']}, {'$eq': ['$status', " "ObjectId('6891ff7822e475eddc20f159')]}]}}}], 'as': " "'queries__orderitem'}}, {'$unwind': '$queries__orderitem'}, " - "{'$match': {'$expr': {'$eq': ['$queries__orderitem.status', " - "ObjectId('6891ff7822e475eddc20f159')]}}}, " + "{'$match': {'queries__orderitem.status': " + "ObjectId('6891ff7822e475eddc20f159')}}, " "{'$addFields': {'_id': '$_id'}}, {'$sort': SON([('_id', 1)])}])", ) def test_filter_on_local_and_nested_join_fields(self): + self.maxDiff = None with self.assertNumQueries(1) as ctx: list( Order.objects.filter( @@ -134,11 +136,11 @@ def test_filter_on_local_and_nested_join_fields(self): "{'parent__field__0': '$queries__orderitem.order_id'}, " "'pipeline': [{'$match': {'$expr': {'$and': [{'$eq': " "['$$parent__field__0', '$_id']}, {'$eq': ['$name', 'My Order']}]}" - "}}], 'as': 'T3'}}, {'$unwind': '$T3'}, {'$match': {'$expr': " - "{'$and': [{'$eq': ['$T3.name', 'My Order']}, {'$eq': " - "['$queries__orderitem.status', " - "ObjectId('6891ff7822e475eddc20f159')]}, {'$eq': ['$name', " - "'My Order']}]}}}, {'$addFields': {'_id': '$_id'}}, " + "}}], 'as': 'T3'}}, {'$unwind': '$T3'}, {'$match': " + "{'$and': [{'T3.name': 'My Order'}, " + "{'queries__orderitem.status': ObjectId('6891ff7822e475eddc20f159')}, " + "{'name': 'My Order'}]}}, " + "{'$addFields': {'_id': '$_id'}}, " "{'$sort': SON([('_id', 1)])}])", ) @@ -157,13 +159,14 @@ def test_negated_related_filter_is_not_pushable(self): ) def test_or_on_local_fields_only(self): + self.maxDiff = None with self.assertNumQueries(1) as ctx: list(Order.objects.filter(models.Q(name="A") | models.Q(name="B"))) query = ctx.captured_queries[0]["sql"] self.assertEqual( query, - "db.queries__order.aggregate([{'$match': {'$expr': {'$or': " - "[{'$eq': ['$name', 'A']}, {'$eq': ['$name', 'B']}]}}}, " + "db.queries__order.aggregate([{'$match': {'$or': " + "[{'name': 'A'}, {'name': 'B'}]}}, " "{'$addFields': {'_id': '$_id'}}, {'$sort': SON([('_id', 1)])}])", ) @@ -177,9 +180,8 @@ def test_or_with_mixed_pushable_and_non_pushable_fields(self): "'queries__author', 'let': {'parent__field__0': '$author_id'}, " "'pipeline': [{'$match': {'$expr': {'$and': [{'$eq': " "['$$parent__field__0', '$_id']}]}}}], 'as': 'queries__author'}}, " - "{'$unwind': '$queries__author'}, {'$match': {'$expr': {'$or': " - "[{'$eq': ['$queries__author.name', 'John']}, {'$eq': ['$title', " - "'Don']}]}}}])", + "{'$unwind': '$queries__author'}, {'$match': {'$or': " + "[{'queries__author.name': 'John'}, {'title': 'Don'}]}}])", ) def test_push_equality_between_parent_and_child_fields(self): @@ -201,6 +203,7 @@ def test_push_equality_between_parent_and_child_fields(self): class M2MLookupConditionPushdownTests(TestCase): def test_simple_related_filter_is_pushed(self): + self.maxDiff = None with self.assertNumQueries(1) as ctx: list(Library.objects.filter(readers__name="Alice")) query = ctx.captured_queries[0]["sql"] @@ -246,7 +249,7 @@ def test_simple_related_filter_is_pushed(self): ], "as": "queries__reader" }}, {"$unwind": "$queries__reader"}, - {"$match": {"$expr": {"$eq": ["$queries__reader.name", "Alice"]}}} + {"$match": {"queries__reader.name": "Alice"}} ]) """ self.assertEqual(query, uglify_mongo_aggregate(expected_query)) @@ -285,12 +288,12 @@ def test_subquery_join_is_pushed(self): {"$unwind": "$U2"}, { "$match": { - "$expr": { - "$and": [ - {"$eq": ["$U2.name", "Alice"]}, + "$and": [ + {"U2.name": "Alice"}, + {"$expr": {"$eq": ["$library_id","$$parent__field__0"]} - ] - } + } + ] } }, {"$project": {"a": {"$literal": 1}}}, @@ -385,12 +388,10 @@ def test_filter_on_local_and_related_fields(self): {"$unwind": "$queries__reader"}, { "$match": { - "$expr": { - "$and": [ - {"$eq": ["$name", "Central"]}, - {"$eq": ["$queries__reader.name", "Alice"]} - ] - } + "$and": [ + {"name": "Central"}, + {"queries__reader.name": "Alice"} + ] } } ] @@ -473,7 +474,7 @@ def test_or_on_local_fields_only(self): } }, {"$unwind": "$queries__reader"}, - {"$match": {"$expr": {"$eq": ["$name", "Ateneo"]}}}, + {"$match": {"name": "Ateneo"}}, { "$project": { "queries__reader": {"foreing_field": "$queries__reader.name"}, @@ -556,12 +557,10 @@ def test_or_with_mixed_pushable_and_non_pushable_fields(self): {"$unwind": "$queries__reader"}, { "$match": { - "$expr": { - "$or": [ - {"$eq": ["$queries__reader.name", "Alice"]}, - {"$eq": ["$name", "Central"]} - ] - } + "$or": [ + {"queries__reader.name": "Alice"}, + {"name": "Central"} + ] } } ])