Skip to content

Commit 0d22545

Browse files
committed
[IMP] util.explode_query_range: replace problematic parallel_filter formatting
The usage of `str.format` to inject the parallel filter used to explode queries is not robust to the presence of other curly braces. Examples: 1. `JSON` strings (typically to leverage their mapping capabilities): see 79f3d71, where a query had to be modified to accomodate that. 2. Hardcoded sets of curly braces: ```python >>> "UPDATE t SET c = '{usage as literal characters}' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> KeyError: 'usage as literal characters' ``` Which can be (unelegantly) solved adding even more braces, leveraging one side-effect of `.format`: ```python >>> "UPDATE t SET c = '{{usage as literal characters}}' WHERE {parallel_filter}".format(parallel_filter="…") "UPDATE t SET c = '{usage as literal characters}' WHERE …" ``` 3. Hardcoded curly braces (AFAICT no way to solve this): ```python >>> "UPDATE t SET c = 'this is an open curly brace = {' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: unexpected '{' in field name ``` ```python >>> "UPDATE t SET c = 'this is a close brace = }' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: Single '}' encountered in format string ```
1 parent 98a959e commit 0d22545

File tree

2 files changed

+46
-5
lines changed

2 files changed

+46
-5
lines changed

src/base/tests/test_util.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,34 @@ def _get_cr(self):
773773
self.addCleanup(cr.close)
774774
return cr
775775

776+
def test_explode_format_parallel_filter(self):
777+
cr = self._get_cr()
778+
779+
cr.execute("SELECT MIN(id) FROM res_users")
780+
min_id = cr.fetchone()[0]
781+
782+
q1 = "SELECT '{} {0} {x} {x:*^30d} {x.a.b} {x[0]}, {x[1]!s:*^30} {{x}}' FROM res_users"
783+
q2 = "SELECT '{} {0} {x} {x:*^30d} {x.a.b} {x[0]}, {x[1]!s:*^30} {{x}}' FROM res_users WHERE {parallel_filter}"
784+
expected_out = q1 + f" WHERE res_users.id BETWEEN {min_id} AND {min_id}"
785+
786+
out1 = util.explode_query_range(
787+
cr,
788+
q1,
789+
table="res_users",
790+
bucket_size=1,
791+
format=False,
792+
)[0]
793+
self.assertEqual(out1, expected_out)
794+
795+
out2 = util.explode_query_range(
796+
cr,
797+
q2,
798+
table="res_users",
799+
bucket_size=1,
800+
format=False,
801+
)[0]
802+
self.assertEqual(out2, expected_out)
803+
776804
def test_explode_mult_filters(self):
777805
cr = self._get_cr()
778806
queries = util.explode_query_range(

src/util/pg.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
260260
return [cr.mogrify(query, [num_buckets, index]).decode() for index in range(num_buckets)]
261261

262262

263-
def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET_SIZE, prefix=None):
263+
def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET_SIZE, prefix=None, format=True):
264264
"""
265265
Explode a query to multiple queries that can be executed in parallel.
266266
@@ -335,16 +335,27 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
335335
# Still, since the query may only be valid if there is no split, we force the usage of `prefix` in the query to
336336
# validate its correctness and avoid scripts that pass the CI but fail in production.
337337
parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
338-
return [query.format(parallel_filter=parallel_filter)]
338+
return [
339+
(
340+
query.format(parallel_filter=parallel_filter)
341+
if format
342+
else query.replace("{parallel_filter}", parallel_filter)
343+
)
344+
]
339345

340346
parallel_filter = "{alias}.id BETWEEN %(lower-bound)s AND %(upper-bound)s".format(alias=alias)
341-
query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
347+
348+
query = query.replace("%", "%%")
349+
query = (
350+
query.format(parallel_filter=parallel_filter) if format else query.replace("{parallel_filter}", parallel_filter)
351+
)
352+
342353
return [
343354
cr.mogrify(query, {"lower-bound": ids[i], "upper-bound": ids[i + 1] - 1}).decode() for i in range(len(ids) - 1)
344355
]
345356

346357

347-
def explode_execute(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET_SIZE, logger=_logger):
358+
def explode_execute(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET_SIZE, logger=_logger, format=True):
348359
"""
349360
Execute a query in parallel.
350361
@@ -376,6 +387,8 @@ def explode_execute(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET_SIZ
376387
:param int bucket_size: size of the buckets of ids to split the processing
377388
:param logger: logger used to report the progress
378389
:type logger: :class:`logging.Logger`
390+
:param bool format: whether to use `.format` (instead of `.replace`) to replace the parallel filter,
391+
setting it to `False` can prevent issues with hard-coded curly braces.
379392
:return: the sum of `cr.rowcount` for each query run
380393
:rtype: int
381394
@@ -387,7 +400,7 @@ def explode_execute(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET_SIZ
387400
"""
388401
return parallel_execute(
389402
cr,
390-
explode_query_range(cr, query, table, alias=alias, bucket_size=bucket_size),
403+
explode_query_range(cr, query, table, alias=alias, bucket_size=bucket_size, format=format),
391404
logger=logger,
392405
)
393406

0 commit comments

Comments
 (0)