Skip to content

Commit be43963

Browse files
committed
PoC Expression Conversion Abstraction
1 parent 0b46a50 commit be43963

File tree

6 files changed

+402
-0
lines changed

6 files changed

+402
-0
lines changed

query_conversion/__init__.py

Whitespace-only changes.
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
"""Expression To Match Converters"""
2+
3+
4+
class _BaseExpressionConverter:
5+
"""
6+
Base class for optimizers that handle specific operations in MQL queries.
7+
This class can be extended to implement optimizations for other operations.
8+
"""
9+
10+
@classmethod
11+
def optimize(cls, expr):
12+
raise NotImplementedError("Subclasses should implement this method.")
13+
14+
@classmethod
15+
def is_simple_value(cls, value):
16+
"""
17+
Check if the value is a simple type (not a list or dict).
18+
"""
19+
return isinstance(value, (str, int, float, bool)) or value is None
20+
21+
@classmethod
22+
def is_convertable_field_name(cls, field_name):
23+
"""Validate a field_name is one that can be represented in $match"""
24+
# This needs work and re-evaluation
25+
if (
26+
isinstance(field_name, str)
27+
and field_name.startswith("$")
28+
and not field_name[:1].isalnum()
29+
):
30+
return True
31+
return False
32+
33+
34+
class _EqExpressionConverter(_BaseExpressionConverter):
35+
"""Convert $eq operation to a $match compatible query."""
36+
37+
@classmethod
38+
def optimize(cls, eq_args):
39+
if isinstance(eq_args, list) and len(eq_args) == 2:
40+
field_expr, value = eq_args
41+
42+
# Check if first argument is a simple field reference
43+
if (
44+
isinstance(field_expr, str)
45+
and field_expr.startswith("$")
46+
and cls.is_simple_value(value)
47+
):
48+
field_name = field_expr[1:] # Remove the $ prefix
49+
return {field_name: value}
50+
51+
return None
52+
53+
54+
class _InExpressionConverter(_BaseExpressionConverter):
55+
"""Convert $in operation to a $match compatible query."""
56+
57+
@classmethod
58+
def optimize(cls, in_args):
59+
if isinstance(in_args, list) and len(in_args) == 2:
60+
field_expr, values = in_args
61+
62+
# Check if first argument is a simple field reference
63+
if isinstance(field_expr, str) and field_expr.startswith("$"):
64+
field_name = field_expr[1:] # Remove the $ prefix
65+
if isinstance(values, list) and all(
66+
cls.is_simple_value(v) for v in values
67+
):
68+
return {field_name: {"$in": values}}
69+
70+
return None
71+
72+
73+
class _LogicalExpressionConverter(_BaseExpressionConverter):
74+
"""Generic for converting logical operations to a $match compatible query."""
75+
76+
@classmethod
77+
def optimize(cls, combined_conditions):
78+
if isinstance(combined_conditions, list):
79+
optimized_conditions = []
80+
for condition in combined_conditions:
81+
if isinstance(condition, dict) and len(condition) == 1:
82+
if optimized_condition := convert_expression(condition):
83+
optimized_conditions.append(optimized_condition)
84+
else:
85+
# Any failure should stop optimization
86+
return None
87+
if optimized_conditions:
88+
return {cls._logical_op: optimized_conditions}
89+
return None
90+
91+
92+
class _OrExpressionConverter(_LogicalExpressionConverter):
93+
"""Convert $or operation to a $match compatible query."""
94+
95+
_logical_op = "$or"
96+
97+
98+
class _AndExpressionConverter(_LogicalExpressionConverter):
99+
"""Convert $and operation to a $match compatible query."""
100+
101+
_logical_op = "$and"
102+
103+
104+
OPTIMIZABLE_OPS = {
105+
"$eq": _EqExpressionConverter,
106+
"$in": _InExpressionConverter,
107+
"$and": _AndExpressionConverter,
108+
"$or": _OrExpressionConverter,
109+
}
110+
111+
112+
def convert_expression(expr):
113+
"""
114+
Optimize an MQL expression by extracting optimizable conditions.
115+
116+
Args:
117+
expr: Dictionary containing the MQL expression
118+
119+
Returns:
120+
Optimized match condition or None if not optimizable
121+
"""
122+
if isinstance(expr, dict) and len(expr) == 1:
123+
op = list(expr.keys())[0]
124+
if op in OPTIMIZABLE_OPS:
125+
return OPTIMIZABLE_OPS[op].optimize(expr[op])
126+
return None

query_conversion/query_optimizer.py

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
from copy import deepcopy
2+
3+
from expression_converters import convert_expression
4+
5+
6+
class QueryOptimizer:
7+
def optimize(self, expr):
8+
"""
9+
Takes an MQL query with $expr and optimizes it by extracting
10+
optimizable conditions into separate $match stages.
11+
12+
Args:
13+
expr_query: Dictionary containing the $expr query
14+
15+
Returns:
16+
List of optimized match conditions
17+
"""
18+
expr_query = deepcopy(expr)
19+
20+
if "$expr" not in expr_query:
21+
return [expr_query]
22+
23+
if expr_query["$expr"] == {}:
24+
return [{"$match": {}}]
25+
26+
expr_content = expr_query["$expr"]
27+
match_conditions = []
28+
remaining_expr_conditions = []
29+
30+
# Handle the expression content
31+
self._process_expression(
32+
expr_content, match_conditions, remaining_expr_conditions
33+
)
34+
35+
# If there are remaining conditions that couldn't be optimized,
36+
# keep them in an $expr
37+
if remaining_expr_conditions:
38+
if len(remaining_expr_conditions) == 1:
39+
expr_conditions = {"$expr": remaining_expr_conditions[0]}
40+
else:
41+
expr_conditions = {"$expr": {"$and": remaining_expr_conditions}}
42+
43+
if match_conditions:
44+
# This assumes match_conditions is a list of dicts with $match
45+
match_conditions[0]["$match"].update(expr_conditions)
46+
47+
return match_conditions
48+
49+
def _process_expression(self, expr, match_conditions, remaining_conditions):
50+
"""
51+
Process an expression and extract optimizable conditions.
52+
53+
Args:
54+
expr: The expression to process
55+
match_conditions: List to append optimized match conditions
56+
remaining_conditions: List to append non-optimizable conditions
57+
"""
58+
if isinstance(expr, dict):
59+
# Check if this is an $and operation
60+
has_and = "$and" in expr
61+
has_or = "$or" in expr
62+
# Do a top-level check for $and or $or because these should inform
63+
# If they fail, they should failover to a remaining conditions list
64+
# There's probably a better way to do this, but this is a start
65+
if has_and:
66+
self._process_logical_conditions(
67+
"$and", expr["$and"], match_conditions, remaining_conditions
68+
)
69+
if has_or:
70+
self._process_logical_conditions(
71+
"$or", expr["$or"], match_conditions, remaining_conditions
72+
)
73+
if not has_and and not has_or:
74+
# Process single condition
75+
optimized = convert_expression(expr)
76+
if optimized:
77+
match_conditions.append({"$match": optimized})
78+
else:
79+
remaining_conditions.append(expr)
80+
else:
81+
# Can't optimize
82+
remaining_conditions.append(expr)
83+
84+
def _process_logical_conditions(
85+
self, logical_op, logical_conditions, match_conditions, remaining_conditions
86+
):
87+
"""
88+
Process conditions within a logical array.
89+
90+
Args:
91+
logical_conditions: List of conditions within logical operator
92+
match_conditions: List to append optimized match conditions
93+
remaining_conditions: List to append non-optimizable conditions
94+
"""
95+
optimized_conditions = []
96+
for condition in logical_conditions:
97+
if isinstance(condition, dict):
98+
if optimized := convert_expression(condition):
99+
optimized_conditions.append(optimized)
100+
else:
101+
remaining_conditions.append(condition)
102+
else:
103+
remaining_conditions.append(condition)
104+
match_conditions.append({"$match": {logical_op: optimized_conditions}})
105+
106+
107+
def test_optimizer(optimizer, query, idx):
108+
"""
109+
Test the QueryOptimizer with various conditions.
110+
"""
111+
print("Before optimization:")
112+
pprint(query)
113+
print("After optimization:")
114+
pprint(optimizer.optimize(query))
115+
print()
116+
117+
118+
# Example usage and test cases
119+
if __name__ == "__main__":
120+
optimizer = QueryOptimizer()
121+
from pprint import pprint
122+
123+
# Test case 1: Simple $eq
124+
query1 = {"$expr": {"$eq": ["$status", "active"]}}, ("Test 1 - Simple $eq:")
125+
126+
# Test case 2: Simple $in
127+
query2 = (
128+
{"$expr": {"$in": ["$category", ["electronics", "books", "clothing"]]}},
129+
("Test 2 - Simple $in:"),
130+
)
131+
132+
# Test case 3: $and with multiple optimizable conditions
133+
query3 = (
134+
{
135+
"$expr": {
136+
"$and": [
137+
{"$eq": ["$status", "active"]},
138+
{"$in": ["$category", ["electronics", "books"]]},
139+
{"$eq": ["$verified", True]},
140+
]
141+
}
142+
},
143+
("Test 3 - $and with optimizable conditions:"),
144+
)
145+
146+
# Test case 4: Mixed optimizable and non-optimizable conditions
147+
query4 = (
148+
{
149+
"$expr": {
150+
"$and": [
151+
{"$eq": ["$status", "active"]},
152+
{"$gt": ["$price", 100]}, # Not optimizable
153+
{"$in": ["$category", ["electronics"]]},
154+
]
155+
}
156+
},
157+
("Test 4 - Mixed conditions:"),
158+
)
159+
160+
# Test case 5: Non-optimizable condition
161+
query5 = (
162+
{"$expr": {"$gt": ["$price", 100]}},
163+
("Test 5 - Non-optimizable condition:"),
164+
)
165+
166+
# Test case 6: Nested $or conditions
167+
query6 = (
168+
{
169+
"$expr": {
170+
"$or": [
171+
{"$eq": ["$status", "active"]},
172+
{"$in": ["$category", ["electronics", "books"]]},
173+
{"$and": [{"$eq": ["$verified", True]}, {"$gt": ["$price", 50]}]},
174+
]
175+
}
176+
},
177+
("Test 6 - Nested $or conditions:"),
178+
)
179+
180+
# Test case 7: Complex nested conditions with non-optimizable parts
181+
query7 = (
182+
{
183+
"$expr": {
184+
"$and": [
185+
{
186+
"$or": [
187+
{"$eq": ["$status", "active"]},
188+
{"$gt": ["$views", 1000]},
189+
]
190+
},
191+
{"$in": ["$category", ["electronics", "books"]]},
192+
{"$eq": ["$verified", True]},
193+
{"$gt": ["$price", 50]}, # Not optimizable
194+
]
195+
}
196+
},
197+
("Test 7 - Complex nested conditions:"),
198+
)
199+
200+
# Test case 8: London $in test case
201+
query8 = (
202+
{"$expr": {"$in": ["$author_city", ["London"]]}},
203+
("Test 8 - London $in test case:"),
204+
)
205+
206+
# Test case 9: Deeply nested logical operations
207+
query9 = (
208+
{
209+
"$expr": {
210+
"$and": [
211+
{
212+
"$or": [
213+
{"$eq": ["$type", "premium"]},
214+
{
215+
"$and": [
216+
{"$eq": ["$type", "standard"]},
217+
{"$in": ["$region", ["US", "CA"]]},
218+
]
219+
},
220+
]
221+
},
222+
{"$eq": ["$active", True]},
223+
]
224+
}
225+
},
226+
("Test 9 - Deeply nested logical operations:"),
227+
)
228+
229+
# Test case 10: Deeply nested logical operations, with Variable!
230+
query10 = (
231+
{
232+
"$expr": {
233+
"$and": [
234+
{
235+
"$or": [
236+
{"$eq": ["$type", "premium"]},
237+
{
238+
"$and": [
239+
{"$eq": ["$type", "standard"]},
240+
{"$in": ["$region", ["US", "CA"]]},
241+
]
242+
},
243+
]
244+
},
245+
{"$eq": ["$active", True]},
246+
]
247+
}
248+
},
249+
("Test 10 - Deeply nested logical operations, with Variables!"),
250+
)
251+
252+
queries = [
253+
query1,
254+
query2,
255+
query3,
256+
query4,
257+
query5,
258+
query6,
259+
query7,
260+
query8,
261+
query9,
262+
query10,
263+
]
264+
265+
for idx, (query, description) in enumerate(queries, start=1):
266+
print(description)
267+
test_optimizer(optimizer, query, idx)

tests/expression_converter_/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)