Skip to content

Commit 5a1c11a

Browse files
Merge pull request #49 from RS-PYTHON/feat/rspy-723-composite-cql2-filters-dont-work
Feat/RSPY-723: composite CQL2 filters don't work in Auxip staging (mockup only)
2 parents 82d24f1 + 9f7a37a commit 5a1c11a

File tree

2 files changed

+232
-135
lines changed

2 files changed

+232
-135
lines changed

src/ADGS/adgs_station_mock.py

Lines changed: 93 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def hello():
124124
return Response(status=HTTPStatus.OK)
125125

126126

127-
def process_products_request(request, headers):
127+
def process_products_request(request, headers) -> Response:
128128
"""Docstring to be added."""
129129
catalog_path = app.config["configuration_path"] / "Catalog/GETFileResponse.json"
130130
catalog_data = json.loads(open(catalog_path).read())
@@ -241,9 +241,9 @@ def process_products_request(request, headers):
241241
def process_query(query):
242242
# Step 1: Remove the part before "any("
243243
queries = query.split("any(")
244-
244+
245245
results = []
246-
246+
247247
# Step 2: Process each part individually
248248
for q in queries:
249249
if ")" in q:
@@ -254,9 +254,95 @@ def process_query(query):
254254
# Collect and clean up each part
255255
for part in parts:
256256
results.append(part.strip())
257-
257+
258258
return results
259+
260+
261+
def is_operator_next(expression: str, position: int) -> str:
262+
"""Check in the expression if there is an operator from the list at the given position
263+
and returns it if so.
264+
"""
265+
for operator in aditional_operators:
266+
if position<len(expression)-len(operator) and expression[position:position+len(operator)] == operator:
267+
return operator
268+
return ""
269+
270+
271+
def split_composite_filter(filter_to_split: str) -> tuple[list[str], list[str]]:
272+
"""Function to split a filter made of two or more filters separated with an operator.
273+
The split is done at the first level of the filter only.
259274
275+
Examples:
276+
- used on "(field1 or condition1) and (field2 or condition2)" it will return
277+
["field1 or condition1", "field2 or condition2"] with operators = ["and"]
278+
- used on "field1 or condition1" it will return ["field1", "condition1"] with operators = ["or"]
279+
280+
Note that if the input filter is like "(field1 and condition1)" the parenthesis will be removed and
281+
it will be considered as "field1 and condition1", but if it's like "SomeInfo(field1 and condition1)"
282+
then it won't be considered as a composite filter and won't be splitted.
283+
"""
284+
splitted_filter: list[str] = []
285+
current = []
286+
operators = []
287+
depth = 0
288+
i = 0
289+
290+
# Remove parenthesis if useless (ex: "(ex1 and ex2)" but not "(ex1) and (ex2)")
291+
if re.fullmatch(r'\([^()]*\)', filter_to_split.strip()):
292+
filter_to_split = filter_to_split.removeprefix("(")
293+
filter_to_split = filter_to_split.removesuffix(")")
294+
295+
# Split filter at depth 0 based on operators (anything outside parenthesis basically)
296+
while i < len(filter_to_split):
297+
if filter_to_split[i] == '(':
298+
depth += 1
299+
current.append(filter_to_split[i])
300+
i += 1
301+
elif filter_to_split[i] == ')':
302+
depth -= 1
303+
current.append(filter_to_split[i])
304+
i += 1
305+
elif depth == 0 and (operator := is_operator_next(filter_to_split, i)):
306+
splitted_filter.append(''.join(current).strip())
307+
current = []
308+
operators.append(operator.strip())
309+
i += len(operator)
310+
else:
311+
current.append(filter_to_split[i])
312+
i += 1
313+
314+
if current:
315+
splitted_filter.append(''.join(current).strip())
316+
317+
# Return subfilters and operators found
318+
return splitted_filter, operators
319+
320+
321+
def process_filter(request, input_filter: str) -> Response:
322+
"""Recursive function to go through any filter (composite or not) and return
323+
the result of the full filter.
324+
"""
325+
# Split the filter
326+
splitted_filters, operators = split_composite_filter(input_filter)
327+
328+
# If there is only one filter, apply it and gather results
329+
if len(splitted_filters)==1:
330+
end_filter = splitted_filters[0]
331+
if "Attributes" in end_filter or "OData.CSC" in end_filter:
332+
return process_attributes_search(end_filter, request.args)
333+
return process_products_request(str(end_filter), request.args)
334+
335+
# If there is more than one filter, repeat operation on each one and combine its
336+
# results with the ones of the previous one using the correct operator
337+
else:
338+
i=1
339+
final_results = process_filter(request, splitted_filters[0])
340+
while i < len(splitted_filters):
341+
current_filter_results = process_filter(request, splitted_filters[i])
342+
final_results = process_common_elements(final_results, current_filter_results, operators[i-1])
343+
return final_results
344+
345+
260346
def extract_values_and_operation(part1, part2):
261347
# Regular expression to capture the operation and value between single quotes
262348
pattern = r"(\b(eq|gt|lt)\b)\s+'(.*?)'"
@@ -274,7 +360,7 @@ def extract_values_and_operation(part1, part2):
274360

275361
return value1, operation, value2
276362

277-
def process_attributes_search(query, headers):
363+
def process_attributes_search(query, headers) -> Response:
278364
# Don;t touch this, it just works
279365
results = process_query(query)
280366
if len(results) == 2:
@@ -382,131 +468,13 @@ def query_products():
382468
catalog_data = json.loads(open(catalog_path).read())
383469
return Response(status=HTTPStatus.OK, response=prepare_response_odata_v4(catalog_data['Data']), headers=request.args)
384470
# Handle parantheses
385-
if not (match := re.search(r"\(([^()]*\sor\s[^()]*)\)", request.args["$filter"])):
471+
if not re.search(r"\(([^()]*\sor\s[^()]*)\)", request.args["$filter"]):
386472
if not any(
387473
[query_text in request.args["$filter"].split(" ")[0] for query_text in ["Name", "PublicationDate", "Attributes", "ContentDate/Start", "ContentDate/End"]],
388474
):
389475
return Response(status=HTTPStatus.BAD_REQUEST)
390-
else:
391-
if " and " not in request.args['$filter']:
392-
conditions = re.split(r"\s+or\s+|\s+OR\s+", match.group(1))
393-
responses = [process_products_request(cond, request.args) for cond in conditions]
394-
first_response = json.loads(responses[0].data)['value']
395-
second_response = json.loads(responses[1].data)['value']
396-
fresp_set = {d.get("Id", None) for d in first_response}
397-
sresp_set = {d.get("Id", None) for d in second_response}
398-
union_set = fresp_set.union(sresp_set)
399-
union_elements = [d for d in first_response + second_response if d.get("Id") in union_set]
400-
return Response(status=HTTPStatus.OK, response=prepare_response_odata_v4(union_elements), headers=request.args)
401-
match len(request.args['$filter'].split(" and ")):
402-
case 1:
403-
conditions = re.split(r"\s+or\s+|\s+OR\s+", match.group(1))
404-
responses = [process_products_request(cond, request.args) for cond in conditions]
405-
first_response = json.loads(responses[0].data)['value']
406-
second_response = json.loads(responses[1].data)['value']
407-
fresp_set = {d.get("Id", None) for d in first_response}
408-
sresp_set = {d.get("Id", None) for d in second_response}
409-
union_set = fresp_set.union(sresp_set)
410-
union_elements = [d for d in first_response + second_response if d.get("Id") in union_set]
411-
responses = json.loads(process_products_request(filter, request.args).data)["value"]
412-
fresp_set = {d.get("Id", None) for d in responses}
413-
sresp_set = {d.get("Id", None) for d in union_elements}
414-
common_response = fresp_set.intersection(sresp_set)
415-
common_elements = [d for d in responses if d.get("Id") in common_response]
416-
if common_elements:
417-
return Response(
418-
status=HTTPStatus.OK,
419-
response=prepare_response_odata_v4(common_elements),
420-
headers=request.args,
421-
)
422-
return Response(status=HTTPStatus.OK, response=json.dumps({"value": []}))
423-
case 2:
424-
union_elements = []
425-
for ops in request.args['$filter'].split(" and "):
426-
conditions = re.split(r"\s+or\s+|\s+OR\s+|\(|\)", ops)
427-
conditions = [p for p in conditions if p.strip()]
428-
responses = [process_products_request(cond, request.args) for cond in conditions]
429-
first_response = json.loads(responses[0].data)['value']
430-
second_response = json.loads(responses[1].data)['value']
431-
fresp_set = {d.get("Id", None) for d in first_response}
432-
sresp_set = {d.get("Id", None) for d in second_response}
433-
union_set = fresp_set.union(sresp_set)
434-
union_elements.append([d for d in first_response + second_response if d.get("Id") in union_set])
435-
first_ops_response = {d.get("Id", None) for d in union_elements[0]}
436-
second_ops_response = {d.get("Id", None) for d in union_elements[1]}
437-
common_response = first_ops_response.intersection(second_ops_response)
438-
common_elements = [d for d in first_response + second_response if d.get("Id") in common_response]
439-
return Response(status=HTTPStatus.OK, response=prepare_response_odata_v4(common_elements), headers=request.args)
440-
case 4:
441-
conditions = request.args['$filter'].split(" and ")
442-
443-
if any("PublicationDate" in cond for cond in conditions):
444-
pass # Do nothing if at least one condition contains "PublicationDate"
445-
case _:
446-
msg = "Too complex for adgs sim"
447-
logger.error(msg)
448-
return Response ("Too complex for adgs sim", status=HTTPStatus.BAD_REQUEST)
449-
450-
if len(qs_parser := request.args['$filter'].split(' and ')) > 2:
451-
outputs = []
452-
properties_filter = []
453-
attributes_filter = []
454-
for filter in qs_parser:
455-
if any(property in filter for property in ["contains", "PublicationDate"]):
456-
properties_filter.append(filter)
457-
elif "OData.CSC.StringAttribute" in filter:
458-
attributes_filter.append(filter)
459-
460-
if len(properties_filter) > 4 or len(attributes_filter) > 4:
461-
msg = "Too complex for adgs sim"
462-
logger.error(msg)
463-
return Response ("Too complex for adgs sim", status=HTTPStatus.BAD_REQUEST)
464-
# Tempfix, when filter is very complex, use only GT / LT
465-
properties_filter = [f.split(" or ")[0].strip("'\"()") if " or " in f else f for f in properties_filter]
466-
# Process each property in the filter
467-
processed_requests = [
468-
process_products_request(prop.strip("'\""), request.args)
469-
for prop in properties_filter
470-
]
471-
472-
# Combine the processed requests based on the number of filters
473-
if len(processed_requests) in {1, 2, 3}:
474-
if len(processed_requests) == 1:
475-
outputs.append(processed_requests[0])
476-
else:
477-
common_elements = processed_requests[0]
478-
for req in processed_requests[1:]:
479-
common_elements = process_common_elements(common_elements, req, "and")
480-
outputs.append(common_elements)
481-
if not attributes_filter:
482-
# If there are no attributes to process, just return this
483-
return Response(status=outputs[0].status, response=outputs[0].data, headers=request.args)
484-
485-
486-
# Handle attributes_filter processing
487-
if len(attributes_filter) in {2, 4}:
488-
for i in range(0, len(attributes_filter), 2):
489-
outputs.append(process_attributes_search(f"{attributes_filter[i]} and {attributes_filter[i + 1]}", request.args))
490-
491-
try:
492-
return process_common_elements(outputs[0], outputs[1], "and")
493-
except IndexError:
494-
return Response(status=HTTPStatus.OK, response=json.dumps({"value": []}))
495476

496-
if "Attributes" in request.args['$filter'] or "OData.CSC" in request.args['$filter']:
497-
return process_attributes_search(request.args['$filter'], request.args)
498-
if any(header in request.args["$filter"] for header in aditional_operators):
499-
pattern = r"(\S+ \S+ \S+) (\S+) (\S+ \S+ \S+)"
500-
groups = re.search(pattern, request.args["$filter"])
501-
if groups:
502-
first_request, operator, second_request = groups.group(1), groups.group(2), groups.group(3)
503-
# split and processes the requests
504-
first_response = process_products_request(first_request.replace('"', ""), request.args)
505-
second_response = process_products_request(second_request.replace('"', ""), request.args)
506-
# Load response data to a json dict
507-
return process_common_elements(first_response, second_response, operator)
508-
509-
return process_products_request(str(request.args["$filter"]), request.args)
477+
return process_filter(request, request.args['$filter'])
510478

511479

512480
@app.route("/Products(<Id>)/$value", methods=["GET"])

0 commit comments

Comments
 (0)