Skip to content

fix(django): Db query recording improvements. #4681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 146 additions & 44 deletions sentry_sdk/integrations/django/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
RequestExtractor,
)

_cached_db_configs = {} # type: Dict[str, Dict[str, Any]]

try:
from django import VERSION as DJANGO_VERSION
from django.conf import settings as django_settings
Expand Down Expand Up @@ -155,9 +157,9 @@ def __init__(
def setup_once():
# type: () -> None
_check_minimum_version(DjangoIntegration, DJANGO_VERSION)
_cache_database_configurations()

install_sql_hook()
# Patch in our custom middleware.

# logs an error for every 500
ignore_logger("django.server")
Expand Down Expand Up @@ -614,6 +616,42 @@ def _set_user_info(request, event):
pass


def _cache_database_configurations():
# type: () -> None
"""
Cache database configurations from Django settings to avoid connection pool interference.
"""
global _cached_db_configs

try:
from django.conf import settings
from django.db import connections

for alias, db_config in settings.DATABASES.items():
if not db_config: # Skip empty default configs
continue

with capture_internal_exceptions():
_cached_db_configs[alias] = {
"db_name": db_config.get("NAME"),
"host": db_config.get("HOST", "localhost"),
"port": db_config.get("PORT"),
"unix_socket": db_config.get("OPTIONS", {}).get("unix_socket"),
"engine": db_config.get("ENGINE"),
}

db_wrapper = connections.get(alias)
if db_wrapper is None:
continue

if hasattr(db_wrapper, "vendor"):
_cached_db_configs[alias]["vendor"] = db_wrapper.vendor

except Exception as e:
logger.debug("Failed to cache database configurations: %s", e)
_cached_db_configs = {}


def install_sql_hook():
# type: () -> None
"""If installed this causes Django's queries to be captured."""
Expand Down Expand Up @@ -668,7 +706,6 @@ def executemany(self, sql, param_list):
span_origin=DjangoIntegration.origin_db,
) as span:
_set_db_data(span, self)

result = real_executemany(self, sql, param_list)

with capture_internal_exceptions():
Expand All @@ -687,8 +724,9 @@ def connect(self):
name="connect",
origin=DjangoIntegration.origin_db,
) as span:
connection = real_connect(self)
_set_db_data(span, self)
return real_connect(self)
return connection

CursorWrapper.execute = execute
CursorWrapper.executemany = executemany
Expand All @@ -698,54 +736,118 @@ def connect(self):

def _set_db_data(span, cursor_or_db):
# type: (Span, Any) -> None
"""
Set database connection data to the span.

Tries first to use pre-cached configuration.
If that fails, it uses get_connection_params() to get the database connection
parameters.
"""
from django.core.exceptions import ImproperlyConfigured

db = cursor_or_db.db if hasattr(cursor_or_db, "db") else cursor_or_db
vendor = db.vendor
span.set_data(SPANDATA.DB_SYSTEM, vendor)

# Some custom backends override `__getattr__`, making it look like `cursor_or_db`
# actually has a `connection` and the `connection` has a `get_dsn_parameters`
# attribute, only to throw an error once you actually want to call it.
# Hence the `inspect` check whether `get_dsn_parameters` is an actual callable
# function.
is_psycopg2 = (
hasattr(cursor_or_db, "connection")
and hasattr(cursor_or_db.connection, "get_dsn_parameters")
and inspect.isroutine(cursor_or_db.connection.get_dsn_parameters)
)
if is_psycopg2:
connection_params = cursor_or_db.connection.get_dsn_parameters()
else:
db_alias = db.alias if hasattr(db, "alias") else None

if hasattr(db, "vendor"):
span.set_data(SPANDATA.DB_SYSTEM, db.vendor)

# Use pre-cached configuration
cached_config = _cached_db_configs.get(db_alias) if db_alias else None
if cached_config:
if cached_config["db_name"]:
span.set_data(SPANDATA.DB_NAME, cached_config["db_name"])
if cached_config["host"]:
span.set_data(SPANDATA.SERVER_ADDRESS, cached_config["host"])
if cached_config["port"]:
span.set_data(SPANDATA.SERVER_PORT, str(cached_config["port"]))
if cached_config["unix_socket"]:
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, cached_config["unix_socket"])
return # Success - exit early
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is a bit superfluous

Suggested change
return # Success - exit early
return


# Fallback to dynamic database metadata collection.
# This is the edge case where db configuration is not in Django's `DATABASES` setting.
try:
# Method 1: Try db.get_connection_params() first (NO CONNECTION ACCESS)
logger.debug(
"Cached db connection config retrieval failed for %s. Trying db.get_connection_params().",
db_alias,
)
try:
# psycopg3, only extract needed params as get_parameters
# can be slow because of the additional logic to filter out default
# values
connection_params = {
"dbname": cursor_or_db.connection.info.dbname,
"port": cursor_or_db.connection.info.port,
}
# PGhost returns host or base dir of UNIX socket as an absolute path
# starting with /, use it only when it contains host
pg_host = cursor_or_db.connection.info.host
if pg_host and not pg_host.startswith("/"):
connection_params["host"] = pg_host
except Exception:
connection_params = db.get_connection_params()

db_name = connection_params.get("dbname") or connection_params.get("database")
if db_name is not None:
span.set_data(SPANDATA.DB_NAME, db_name)
db_name = connection_params.get("dbname") or connection_params.get(
"database"
)
if db_name is not None:
span.set_data(SPANDATA.DB_NAME, db_name)

host = connection_params.get("host")
if host is not None:
span.set_data(SPANDATA.SERVER_ADDRESS, host)

port = connection_params.get("port")
if port is not None:
span.set_data(SPANDATA.SERVER_PORT, str(port))

unix_socket = connection_params.get("unix_socket")
if unix_socket is not None:
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, unix_socket)
return # Success - exit early to avoid connection access
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This return doesn't really do anything -- fine with me to leave it though to signal that we're done with this branch/function, but in that case at least the comment should be removed since it's wrong

Suggested change
return # Success - exit early to avoid connection access
return


except (KeyError, ImproperlyConfigured, AttributeError):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can get a KeyError in Fallback 1 (unless it bubbles up from Django) since we're using .get() everywhere, which leads me to the question whether we have some cases where Fallback 1 "fails" silently without throwing an error (and just sets everything to None), and we should've gone to Fallback 2, but we won't, because there were no exceptions?

# Method 2: Last resort - direct connection access (CONNECTION POOL RISK)
logger.debug(
"db.get_connection_params() failed for %s, trying direct connection access",
db_alias,
)

# Some custom backends override `__getattr__`, making it look like `cursor_or_db`
# actually has a `connection` and the `connection` has a `get_dsn_parameters`
# attribute, only to throw an error once you actually want to call it.
# Hence the `inspect` check whether `get_dsn_parameters` is an actual callable
# function.
is_psycopg2 = (
hasattr(cursor_or_db, "connection")
and hasattr(cursor_or_db.connection, "get_dsn_parameters")
and inspect.isroutine(cursor_or_db.connection.get_dsn_parameters)
)
if is_psycopg2:
connection_params = cursor_or_db.connection.get_dsn_parameters()
else:
# psycopg3, only extract needed params as get_parameters
# can be slow because of the additional logic to filter out default
# values
connection_params = {
"dbname": cursor_or_db.connection.info.dbname,
"port": cursor_or_db.connection.info.port,
}
# PGhost returns host or base dir of UNIX socket as an absolute path
# starting with /, use it only when it contains host
host = cursor_or_db.connection.info.host
if host and not host.startswith("/"):
connection_params["host"] = host

db_name = connection_params.get("dbname") or connection_params.get(
"database"
)
if db_name is not None:
span.set_data(SPANDATA.DB_NAME, db_name)

host = connection_params.get("host")
if host is not None:
span.set_data(SPANDATA.SERVER_ADDRESS, host)

server_address = connection_params.get("host")
if server_address is not None:
span.set_data(SPANDATA.SERVER_ADDRESS, server_address)
port = connection_params.get("port")
if port is not None:
span.set_data(SPANDATA.SERVER_PORT, str(port))

server_port = connection_params.get("port")
if server_port is not None:
span.set_data(SPANDATA.SERVER_PORT, str(server_port))
unix_socket = connection_params.get("unix_socket")
if unix_socket is not None:
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, unix_socket)

server_socket_address = connection_params.get("unix_socket")
if server_socket_address is not None:
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, server_socket_address)
except Exception as e:
logger.debug("Failed to get database connection params for %s: %s", db_alias, e)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this log we could keep since it's just emitted if there was an error

# Skip database metadata rather than risk further connection issues


def add_template_context_repr_sequence():
Expand Down
8 changes: 7 additions & 1 deletion sentry_sdk/tracing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,13 @@ def _format_sql(cursor, sql):
if hasattr(cursor, "mogrify"):
real_sql = cursor.mogrify(sql)
if isinstance(real_sql, bytes):
real_sql = real_sql.decode(cursor.connection.encoding)
# Use UTF-8 as default, with latin1 fallback for edge cases
bytes_sql = real_sql # make pypy happy (type narrowing to bytes)
try:
real_sql = bytes_sql.decode("utf-8")
except UnicodeDecodeError:
# If UTF-8 fails, try latin1 as fallback
real_sql = bytes_sql.decode("latin1", errors="replace")
except Exception:
real_sql = None

Expand Down