Skip to content

Commit 468ecf7

Browse files
alexmojakibasepi
andauthored
Optimise handling PostgreSQL dollar quotes (#1905)
* Optimise handling PostgreSQL dollar quotes * CHANGELOG --------- Co-authored-by: Colton Myers <[email protected]>
1 parent f216566 commit 468ecf7

File tree

3 files changed

+56
-24
lines changed

3 files changed

+56
-24
lines changed

CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ endif::[]
3737
===== Features
3838
3939
* Collect the `configured_hostname` and `detected_hostname` separately, and switch to FQDN for the `detected_hostname`. {pull}1891[#1891]
40+
* Improve postgres dollar-quote detection to be much faster {pull}1905[#1905]
4041
4142
[float]
4243
===== Bug fixes

elasticapm/instrumentation/packages/dbapi2.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,21 +54,6 @@ def __repr__(self):
5454
return "<Literal {}{}{}>".format(self.literal_type, self.content, self.literal_type)
5555

5656

57-
def skip_to(start, tokens, value_sequence):
58-
i = start
59-
while i < len(tokens):
60-
for idx, token in enumerate(value_sequence):
61-
if tokens[i + idx] != token:
62-
break
63-
else:
64-
# Match
65-
return tokens[start : i + len(value_sequence)]
66-
i += 1
67-
68-
# Not found
69-
return None
70-
71-
7257
def look_for_table(sql, keyword):
7358
tokens = tokenize(sql)
7459
table_name = _scan_for_table_with_tokens(tokens, keyword)
@@ -109,7 +94,6 @@ def scan(tokens):
10994
prev_was_escape = False
11095
lexeme.append(token)
11196
else:
112-
11397
if token == literal_started:
11498
if literal_started == "'" and len(tokens) > i + 1 and tokens[i + 1] == "'": # double quotes
11599
i += 1
@@ -133,14 +117,30 @@ def scan(tokens):
133117
# Postgres can use arbitrary characters between two $'s as a
134118
# literal separation token, e.g.: $fish$ literal $fish$
135119
# This part will detect that and skip over the literal.
136-
skipped_token = skip_to(i + 1, tokens, "$")
137-
if skipped_token is not None:
138-
dollar_token = ["$"] + skipped_token
139-
140-
skipped = skip_to(i + len(dollar_token), tokens, dollar_token)
141-
if skipped: # end wasn't found.
142-
yield i, Literal("".join(dollar_token), "".join(skipped[: -len(dollar_token)]))
143-
i = i + len(skipped) + len(dollar_token)
120+
try:
121+
# Closing dollar of the opening quote,
122+
# i.e. the second $ in the first $fish$
123+
closing_dollar_idx = tokens.index("$", i + 1)
124+
except ValueError:
125+
pass
126+
else:
127+
quote = tokens[i : closing_dollar_idx + 1]
128+
length = len(quote)
129+
# Opening dollar of the closing quote,
130+
# i.e. the first $ in the second $fish$
131+
closing_quote_idx = closing_dollar_idx + 1
132+
while True:
133+
try:
134+
closing_quote_idx = tokens.index("$", closing_quote_idx)
135+
except ValueError:
136+
break
137+
if tokens[closing_quote_idx : closing_quote_idx + length] == quote:
138+
yield i, Literal(
139+
"".join(quote), "".join(tokens[closing_dollar_idx + 1 : closing_quote_idx])
140+
)
141+
i = closing_quote_idx + length
142+
break
143+
closing_quote_idx += 1
144144
else:
145145
if token != " ":
146146
yield i, token

tests/instrumentation/dbapi2_tests.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2828
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2929
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+
import pytest
3031

3132
from elasticapm.instrumentation.packages.dbapi2 import Literal, extract_signature, scan, tokenize
3233

@@ -71,6 +72,36 @@ def test_scan_double_quotes_at_end():
7172
assert actual == expected
7273

7374

75+
@pytest.mark.parametrize("quote", ["$$", "$q$"])
76+
@pytest.mark.parametrize(
77+
"content",
78+
[
79+
"",
80+
"q",
81+
"Peter q Pan",
82+
"Peter $ Pan",
83+
"Peter $q Pan",
84+
"Peter q$ Pan",
85+
"Peter $q q$ $q q$ Pan Peter $q q$ $q q$ Pan",
86+
"Peter $qq$ Pan",
87+
],
88+
)
89+
def test_scan_dollar_quote(quote, content):
90+
sql = f"Hello {quote}{content}{quote} at Disney World"
91+
tokens = tokenize(sql)
92+
actual = [t[1] for t in scan(tokens)]
93+
expected = ["Hello", Literal(quote, content), "at", "Disney", "World"]
94+
assert actual == expected
95+
96+
97+
def test_dollar_quote_containing_double_dollar():
98+
sql = "Hello $q$Peter $$ Pan$q$ at Disney World"
99+
tokens = tokenize(sql)
100+
actual = [t[1] for t in scan(tokens)]
101+
expected = ["Hello", Literal("$q$", "Peter $$ Pan"), "at", "Disney", "World"]
102+
assert actual == expected
103+
104+
74105
def test_extract_signature_string():
75106
sql = "Hello 'Peter Pan' at Disney World"
76107
actual = extract_signature(sql)

0 commit comments

Comments
 (0)