Skip to content

Commit 8e2b7cd

Browse files
authored
Summarize db.statement in message (#308)
1 parent e3b139d commit 8e2b7cd

File tree

4 files changed

+308
-7
lines changed

4 files changed

+308
-7
lines changed
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
from __future__ import annotations
2+
3+
import re
4+
from typing import Any, Mapping
5+
6+
from opentelemetry.semconv.trace import SpanAttributes
7+
8+
MAX_QUERY_MESSAGE_LENGTH = 80
9+
10+
11+
def message_from_db_statement(attributes: Mapping[str, Any], message: str | None, span_name: str) -> str | None:
12+
"""Try to construct a useful span message from OTel db statement.
13+
14+
Returns: A new string to use as span message or None to keep the original message.
15+
"""
16+
db_statement = attributes.get(SpanAttributes.DB_STATEMENT)
17+
if not isinstance(db_statement, str):
18+
# covers `None` and anything any other unexpected type
19+
return None
20+
21+
if message is not None and message != span_name:
22+
# if the message attribute is set and is different from the span name
23+
# return None and thereby use the message attribute
24+
return None
25+
26+
db_statement = db_statement.strip()
27+
if isinstance(message, str):
28+
db_name = attributes.get(SpanAttributes.DB_NAME)
29+
if db_name and isinstance(db_name, str) and message.endswith(db_name):
30+
operation = message[: -len(db_name) - 1]
31+
else:
32+
operation = message
33+
if operation not in db_statement:
34+
# operation is not in the db_statement, message is custom so avoid custom behavior
35+
return None
36+
if not re.fullmatch(r'\S+', operation):
37+
# operation is not a simple word, avoid custom behavior
38+
return None
39+
40+
return summarize_query(db_statement)
41+
42+
43+
# we don't use \S here since that includes "(" which is not valid in a table name
44+
TABLE_RE = '[\\w."\'`-]+'
45+
SELECT_RE = re.compile(rf'SELECT (.+?) FROM ({TABLE_RE})', flags=re.I | re.DOTALL)
46+
SELECT_CTE_RE = re.compile(rf'WITH (.+?) SELECT (.+?) FROM ({TABLE_RE})', flags=re.I | re.DOTALL)
47+
SELECT_SUBQUERY_RE = re.compile(rf'SELECT (.+?) FROM (\(.+?\)) AS ({TABLE_RE})', flags=re.I | re.DOTALL)
48+
INSERT_RE = re.compile(rf'INSERT INTO ({TABLE_RE}) (\(.+?\)) VALUES (\(.+?\))', flags=re.I)
49+
50+
51+
def summarize_query(db_statement: str) -> str | None:
52+
"""Summarize a database statement, specifically SQL queries.
53+
54+
Args:
55+
db_statement: The database statement to summarize.
56+
57+
Returns: A new string to use as span message or None to keep the original message.
58+
59+
"""
60+
db_statement = db_statement.strip()
61+
if len(db_statement) <= MAX_QUERY_MESSAGE_LENGTH:
62+
return db_statement
63+
64+
# remove comments
65+
db_statement = re.sub(r'^[ \t]*--.*', '', db_statement, flags=re.MULTILINE)
66+
67+
# collapse multiple white spaces to a single space, warning - this can break/change string literals
68+
# but I think that's okay in this scenario
69+
db_statement = re.sub(r'\s{2,}', ' ', db_statement).strip()
70+
if len(db_statement) <= MAX_QUERY_MESSAGE_LENGTH:
71+
return db_statement
72+
73+
if select_match := SELECT_SUBQUERY_RE.match(db_statement):
74+
expr, subquery, table = select_match.groups()
75+
return select(expr, table, match_end=select_match.end(), db_statement=db_statement, sub_query=subquery)
76+
elif select_match := SELECT_RE.match(db_statement):
77+
return select(*select_match.groups(), match_end=select_match.end(), db_statement=db_statement)
78+
elif select_match := SELECT_CTE_RE.match(db_statement):
79+
ctes, expr, table = select_match.groups()
80+
return select(expr, table, match_end=select_match.end(), db_statement=db_statement, ctes=ctes)
81+
elif insert_match := INSERT_RE.match(db_statement):
82+
table, columns, values = insert_match.groups()
83+
return f'INSERT INTO {table} {truncate(columns, 25)} VALUES {truncate(values, 25)}'
84+
else:
85+
return fallback(db_statement)
86+
87+
88+
def select(
89+
expr: str, table: str, *, match_end: int, db_statement: str, ctes: str | None = None, sub_query: str | None = None
90+
) -> str:
91+
expr = truncate(expr, 20)
92+
93+
if sub_query:
94+
summary = f'SELECT {expr} FROM {truncate(sub_query, 25)} AS {truncate(table, 15)}'
95+
else:
96+
# 25 for table because this is the best identifier of the query
97+
summary = f'SELECT {expr} FROM {truncate(table, 25)}'
98+
99+
if ctes:
100+
cte_as = re.findall(rf'({TABLE_RE}) AS', ctes, flags=re.I)
101+
if len(cte_as) == 1:
102+
summary = f'WITH {truncate(cte_as[0], 10)} AS (…) {summary}'
103+
else:
104+
summary = f'WITH …[{len(cte_as)} CTEs] {summary}'
105+
106+
joins = re.findall(rf'JOIN ({TABLE_RE})', db_statement[match_end:], flags=re.I)
107+
if len(joins) == 1:
108+
summary = f'{summary} JOIN {truncate(joins[0], 10)} ON …'
109+
elif joins:
110+
summary = f'{summary} …[{len(joins)} JOINs]'
111+
112+
if re.search('WHERE', db_statement[match_end:], flags=re.I):
113+
summary = f'{summary} WHERE …'
114+
115+
if limit := re.search(r'LIMIT (\d+)', db_statement[match_end:], flags=re.I):
116+
summary = f'{summary} LIMIT {limit.group(1)}'
117+
118+
return truncate(summary, MAX_QUERY_MESSAGE_LENGTH)
119+
120+
121+
def truncate(s: str, length: int) -> str:
122+
if len(s) <= length:
123+
return s
124+
else:
125+
half_length = length // 2
126+
return f'{s[:half_length]}{s[-half_length:]}'
127+
128+
129+
FALLBACK_HALF = MAX_QUERY_MESSAGE_LENGTH // 2 - 2
130+
131+
132+
def fallback(db_statement: str):
133+
return f'{db_statement[:FALLBACK_HALF]}{db_statement[-FALLBACK_HALF:]}'

logfire/_internal/exporters/processor_wrapper.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
PENDING_SPAN_NAME_SUFFIX,
2020
log_level_attributes,
2121
)
22+
from ..db_statement_summary import message_from_db_statement
2223
from ..scrubbing import BaseScrubber
2324
from ..utils import ReadableSpanDict, is_instrumentation_suppressed, span_to_dict, truncate_string
2425
from .wrapper import WrapperSpanProcessor
@@ -53,6 +54,7 @@ def on_end(self, span: ReadableSpan) -> None:
5354
_tweak_asgi_send_receive_spans(span_dict)
5455
_tweak_sqlalchemy_connect_spans(span_dict)
5556
_tweak_http_spans(span_dict)
57+
_summarize_db_statement(span_dict)
5658
_set_error_level_and_status(span_dict)
5759
self.scrubber.scrub_span(span_dict)
5860
span = ReadableSpan(**span_dict)
@@ -241,3 +243,11 @@ def _tweak_http_spans(span: ReadableSpanDict):
241243

242244
if message != name:
243245
span['attributes'] = {**attributes, ATTRIBUTES_MESSAGE_KEY: message}
246+
247+
248+
def _summarize_db_statement(span: ReadableSpanDict):
249+
attributes = span['attributes']
250+
message: str | None = attributes.get(ATTRIBUTES_MESSAGE_KEY) # type: ignore
251+
summary = message_from_db_statement(attributes, message, span['name'])
252+
if summary is not None:
253+
span['attributes'] = {**attributes, ATTRIBUTES_MESSAGE_KEY: summary}

tests/otel_integrations/test_sqlalchemy.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class AuthRecord(Base):
7474
'end_time': 4000000000,
7575
'attributes': {
7676
'logfire.span_type': 'span',
77-
'logfire.msg': 'PRAGMA example.db',
77+
'logfire.msg': 'PRAGMA main.table_info("auth_records")',
7878
'db.statement': 'PRAGMA main.table_info("auth_records")',
7979
'db.system': 'sqlite',
8080
'db.name': 'example.db',
@@ -88,7 +88,7 @@ class AuthRecord(Base):
8888
'end_time': 6000000000,
8989
'attributes': {
9090
'logfire.span_type': 'span',
91-
'logfire.msg': 'PRAGMA example.db',
91+
'logfire.msg': 'PRAGMA temp.table_info("auth_records")',
9292
'db.statement': 'PRAGMA temp.table_info("auth_records")',
9393
'db.system': 'sqlite',
9494
'db.name': 'example.db',
@@ -102,7 +102,10 @@ class AuthRecord(Base):
102102
'end_time': 8000000000,
103103
'attributes': {
104104
'logfire.span_type': 'span',
105-
'logfire.msg': 'CREATE example.db',
105+
'logfire.msg': """\
106+
CREATE TABLE auth_records ( id INTEGER … t VARCHAR NOT NULL, PRIMARY KEY (id)
107+
)\
108+
""",
106109
'db.statement': '\nCREATE TABLE auth_records (\n\tid INTEGER NOT NULL, \n\tnumber INTEGER NOT NULL, \n\tcontent VARCHAR NOT NULL, \n\tPRIMARY KEY (id)\n)\n\n',
107110
'db.system': 'sqlite',
108111
'db.name': 'example.db',
@@ -130,7 +133,7 @@ class AuthRecord(Base):
130133
'end_time': 12000000000,
131134
'attributes': {
132135
'logfire.span_type': 'span',
133-
'logfire.msg': 'select example.db',
136+
'logfire.msg': 'select * from auth_records',
134137
'db.statement': 'select * from auth_records',
135138
'db.system': 'sqlite',
136139
'db.name': 'example.db',
@@ -144,7 +147,7 @@ class AuthRecord(Base):
144147
'end_time': 14000000000,
145148
'attributes': {
146149
'logfire.span_type': 'span',
147-
'logfire.msg': 'INSERT example.db',
150+
'logfire.msg': 'INSERT INTO auth_records (id, number, content) VALUES (?, ?, ?)',
148151
'db.statement': 'INSERT INTO auth_records (id, number, content) VALUES (?, ?, ?)',
149152
'db.system': 'sqlite',
150153
'db.name': 'example.db',
@@ -172,7 +175,7 @@ class AuthRecord(Base):
172175
'end_time': 18000000000,
173176
'attributes': {
174177
'logfire.span_type': 'span',
175-
'logfire.msg': 'SELECT example.db',
178+
'logfire.msg': 'SELECT auth_recor…ds_content FROM auth_records WHERE …',
176179
'db.statement': 'SELECT auth_records.id AS auth_records_id, auth_records.number AS auth_records_number, auth_records.content AS auth_records_content \nFROM auth_records \nWHERE auth_records.id = ?',
177180
'db.system': 'sqlite',
178181
'db.name': 'example.db',
@@ -186,7 +189,7 @@ class AuthRecord(Base):
186189
'end_time': 20000000000,
187190
'attributes': {
188191
'logfire.span_type': 'span',
189-
'logfire.msg': 'DELETE example.db',
192+
'logfire.msg': 'DELETE FROM auth_records WHERE auth_records.id = ?',
190193
'db.statement': 'DELETE FROM auth_records WHERE auth_records.id = ?',
191194
'db.system': 'sqlite',
192195
'db.name': 'example.db',

tests/test_db_statement_summary.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
from logfire._internal.db_statement_summary import message_from_db_statement
2+
3+
4+
def test_no_db_statement():
5+
assert message_from_db_statement({}, None, 'x') is None
6+
7+
8+
def test_short_db_statement():
9+
assert message_from_db_statement({'db.statement': 'SELECT * FROM table'}, None, 'x') == 'SELECT * FROM table'
10+
11+
12+
def test_message_same():
13+
assert (
14+
message_from_db_statement({'db.statement': 'SELECT * FROM table'}, 'SELECT', 'SELECT') == 'SELECT * FROM table'
15+
)
16+
17+
18+
def test_message_different():
19+
assert message_from_db_statement({'db.statement': 'SELECT * FROM table'}, 'SELECT', 'x') is None
20+
21+
22+
def test_message_not_in_db_statement():
23+
q = 'SELECT apple, banana, carrot, durian, egg, fig FROM table WHERE apple = 1'
24+
assert message_from_db_statement({'db.statement': q}, 'not in statement', 'not in statement') is None
25+
26+
27+
def test_message_multiword():
28+
q = 'SELECT apple, banana, carrot, durian, egg, fig FROM table WHERE apple = 1'
29+
assert message_from_db_statement({'db.statement': q}, 'SELECT apple', 'SELECT apple') is None
30+
31+
32+
def test_ok_after_clean():
33+
q = """
34+
-- this is a long comment about the sql
35+
SELECT apple, banana, carrot, durian, egg, fig FROM table
36+
"""
37+
# insert_assert(message_from_db_statement({'db.statement': q}, None, 'x'))
38+
assert (
39+
message_from_db_statement({'db.statement': q}, None, 'x')
40+
== 'SELECT apple, banana, carrot, durian, egg, fig FROM table'
41+
)
42+
43+
44+
def attrs(q: str):
45+
return {'db.statement': q, 'db.system': 'postgresql'}
46+
47+
48+
def test_query_rewritten():
49+
q = 'SELECT apple, banana, carrot, durian, egg, fig FROM table WHERE apple = 1 and banana = 2'
50+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
51+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT apple, ban…, egg, fig FROM table WHERE …'
52+
53+
54+
def test_invalid_sql():
55+
q = 'SELECT apple, banana, carrot, durian, egg, fig FROM "table WHERE apple = 1 offset 12345678901234567890'
56+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
57+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT apple, ban…, egg, fig FROM "table WHERE …'
58+
59+
60+
def test_one_cte():
61+
q = 'WITH foobar AS (SELECT apple, banana, carrot, durian FROM table) SELECT * FROM foobar where x = 1'
62+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
63+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'WITH foobar AS (…) SELECT * FROM foobar WHERE …'
64+
65+
66+
def test_one_cte_long():
67+
q = 'WITH foobar_foobar_foobar AS (SELECT apple, banana, carrot FROM table) SELECT * FROM foobar where x = 1'
68+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
69+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'WITH fooba…oobar AS (…) SELECT * FROM foobar WHERE …'
70+
71+
72+
def test_two_ctes():
73+
q = 'WITH foo AS (SELECT * FROM table), bar AS (SELECT apple, banana, carrot, durian FROM foo) SELECT * FROM bar'
74+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
75+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'WITH …[2 CTEs] SELECT * FROM bar'
76+
77+
78+
def test_long_select():
79+
q = '\nSELECT apple, banana, carrot, durian, egg, fig, grape FROM table offset 12345678901234567890'
80+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
81+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT apple, ban…fig, grape FROM table'
82+
83+
84+
def test_from_subquery():
85+
q = 'select * from (select * from table) as sub where aaaaa_bbbb_cccccc=1 offset 12345678901234567890'
86+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
87+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT * FROM (select * from table) AS sub WHERE …'
88+
89+
90+
def test_from_quoted():
91+
q = 'select * from "foo.bar" as sub where aaaaa_bbbb_cccccc=1 offset 12345678901234567890'
92+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
93+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT * FROM "foo.bar" WHERE …'
94+
95+
96+
def test_from_long():
97+
q = 'select * from "aaaaa.bbbb.cccccc" as sub where aaaaa_bbbb_cccccc=1 offset 12345678901234567890'
98+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
99+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT * FROM "aaaaa.bbbb.cccccc" WHERE …'
100+
101+
102+
def test_one_join():
103+
q = ' SELECT apple, banana, carrot FROM table JOIN other ON table.id = other.id offset 12345678901234567890'
104+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
105+
assert (
106+
message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT apple, ban…na, carrot FROM table JOIN other ON …'
107+
)
108+
109+
110+
def test_one_join_long():
111+
q = ' SELECT apple, banana, carrot FROM table JOIN other_other_other ON table.id = other_other_other.id'
112+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
113+
assert (
114+
message_from_db_statement(attrs(q), None, 'SELECT')
115+
== 'SELECT apple, ban…na, carrot FROM table JOIN other…other ON …'
116+
)
117+
118+
119+
def test_two_joins():
120+
q = 'SELECT * FROM table JOIN other ON table.id = other.id JOIN another ON table.id = another.id'
121+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
122+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT * FROM table …[2 JOINs]'
123+
124+
125+
def test_where():
126+
q = 'SELECT apple, banana, carrot, durian, egg FROM table where a = 1 and b = 2 and c =3'
127+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
128+
assert message_from_db_statement(attrs(q), None, 'SELECT') == 'SELECT apple, ban…urian, egg FROM table WHERE …'
129+
130+
131+
def test_limit():
132+
q = 'SELECT apple, banana, carrot, durian, egg, fig, grape FROM table where apple=12345678901234567890 limit 10'
133+
# insert_assert(message_from_db_statement(attrs(q), None, 'SELECT'))
134+
assert (
135+
message_from_db_statement(attrs(q), None, 'SELECT')
136+
== 'SELECT apple, ban…fig, grape FROM table WHERE … LIMIT 10'
137+
)
138+
139+
140+
def test_update():
141+
q = 'UPDATE table set apple = 1 where banana = 2 and carrrrrrrot = 3 and durian = 4 and egg = 5 and fig = 6'
142+
# insert_assert(message_from_db_statement(attrs(q), None, 'UPDATE'))
143+
assert (
144+
message_from_db_statement(attrs(q), None, 'UPDATE')
145+
== 'UPDATE table set apple = 1 where banan … and durian = 4 and egg = 5 and fig = 6'
146+
)
147+
148+
149+
def test_insert():
150+
q = 'INSERT INTO table (apple, banana, carrot, durian, egg, fig) VALUES (1, 2, 3, 4, 5, 6)'
151+
# insert_assert(message_from_db_statement(attrs(q), None, 'INSERT'))
152+
assert (
153+
message_from_db_statement(attrs(q), None, 'INSERT')
154+
== 'INSERT INTO table (apple, bana…n, egg, fig) VALUES (1, 2, 3, 4, 5, 6)'
155+
)

0 commit comments

Comments
 (0)