-
Notifications
You must be signed in to change notification settings - Fork 557
fix(django): Db query recording improvements. #4681
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
c51ee08
8182704
7ca7e46
e4c1308
cc939a1
43d2b75
fba0b46
0962302
7e19dc2
40884d1
c2d372d
2c16abd
a29b06e
06b0703
cb7e107
902a320
c3f4ab7
2ee6f4a
9439ba4
9d46f9b
9251b26
d951ffc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -30,6 +30,11 @@ | |||||
RequestExtractor, | ||||||
) | ||||||
|
||||||
# Global cache for database configurations to avoid connection pool interference | ||||||
_cached_db_configs = {} # type: Dict[str, Dict[str, Any]] | ||||||
_cache_initialized = False # type: bool | ||||||
_cache_lock = threading.Lock() | ||||||
|
||||||
try: | ||||||
from django import VERSION as DJANGO_VERSION | ||||||
from django.conf import settings as django_settings | ||||||
|
@@ -155,9 +160,9 @@ def __init__( | |||||
def setup_once(): | ||||||
# type: () -> None | ||||||
_check_minimum_version(DjangoIntegration, DJANGO_VERSION) | ||||||
_cache_database_configurations() | ||||||
|
||||||
install_sql_hook() | ||||||
# Patch in our custom middleware. | ||||||
|
||||||
# logs an error for every 500 | ||||||
ignore_logger("django.server") | ||||||
|
@@ -614,6 +619,54 @@ def _set_user_info(request, event): | |||||
pass | ||||||
|
||||||
|
||||||
def _cache_database_configurations(): | ||||||
# type: () -> None | ||||||
""" | ||||||
Cache database configurations from Django settings to avoid connection pool interference. | ||||||
""" | ||||||
global _cached_db_configs, _cache_initialized | ||||||
|
||||||
if _cache_initialized: | ||||||
return | ||||||
|
||||||
with _cache_lock: | ||||||
if _cache_initialized: | ||||||
return | ||||||
|
||||||
try: | ||||||
from django.conf import settings | ||||||
from django.db import connections | ||||||
|
||||||
for alias, db_config in settings.DATABASES.items(): | ||||||
if not db_config: # Skip empty default configs | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens with these, caching-wise? Will they always go through the slow fallback? |
||||||
continue | ||||||
|
||||||
with capture_internal_exceptions(): | ||||||
try: | ||||||
db_wrapper = connections[alias] | ||||||
except (KeyError, Exception): | ||||||
db_wrapper = None | ||||||
|
||||||
_cached_db_configs[alias] = { | ||||||
"db_name": db_config.get("NAME"), | ||||||
"host": db_config.get("HOST"), | ||||||
"port": db_config.get("PORT"), | ||||||
"unix_socket": db_config.get("OPTIONS", {}).get("unix_socket"), | ||||||
"engine": db_config.get("ENGINE"), | ||||||
"vendor": ( | ||||||
db_wrapper.vendor | ||||||
if db_wrapper and hasattr(db_wrapper, "vendor") | ||||||
else None | ||||||
), | ||||||
} | ||||||
|
||||||
_cache_initialized = True | ||||||
|
||||||
except Exception as e: | ||||||
logger.debug("Failed to cache database configurations: %s", e) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd remove these |
||||||
_cached_db_configs = {} | ||||||
|
||||||
|
||||||
def install_sql_hook(): | ||||||
# type: () -> None | ||||||
"""If installed this causes Django's queries to be captured.""" | ||||||
|
@@ -668,7 +721,6 @@ def executemany(self, sql, param_list): | |||||
span_origin=DjangoIntegration.origin_db, | ||||||
) as span: | ||||||
_set_db_data(span, self) | ||||||
|
||||||
result = real_executemany(self, sql, param_list) | ||||||
|
||||||
with capture_internal_exceptions(): | ||||||
|
@@ -687,8 +739,11 @@ def connect(self): | |||||
name="connect", | ||||||
origin=DjangoIntegration.origin_db, | ||||||
) as span: | ||||||
_set_db_data(span, self) | ||||||
return real_connect(self) | ||||||
connection = real_connect(self) | ||||||
with capture_internal_exceptions(): | ||||||
_set_db_data(span, self) | ||||||
|
||||||
return connection | ||||||
antonpirker marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
CursorWrapper.execute = execute | ||||||
CursorWrapper.executemany = executemany | ||||||
|
@@ -698,54 +753,121 @@ def connect(self): | |||||
|
||||||
def _set_db_data(span, cursor_or_db): | ||||||
# type: (Span, Any) -> None | ||||||
""" | ||||||
Set database connection data to the span. | ||||||
|
||||||
Tries first to use pre-cached configuration. | ||||||
If that fails, it uses get_connection_params() to get the database connection | ||||||
parameters. | ||||||
""" | ||||||
from django.core.exceptions import ImproperlyConfigured | ||||||
|
||||||
db = cursor_or_db.db if hasattr(cursor_or_db, "db") else cursor_or_db | ||||||
vendor = db.vendor | ||||||
span.set_data(SPANDATA.DB_SYSTEM, vendor) | ||||||
|
||||||
# Some custom backends override `__getattr__`, making it look like `cursor_or_db` | ||||||
# actually has a `connection` and the `connection` has a `get_dsn_parameters` | ||||||
# attribute, only to throw an error once you actually want to call it. | ||||||
# Hence the `inspect` check whether `get_dsn_parameters` is an actual callable | ||||||
# function. | ||||||
is_psycopg2 = ( | ||||||
hasattr(cursor_or_db, "connection") | ||||||
and hasattr(cursor_or_db.connection, "get_dsn_parameters") | ||||||
and inspect.isroutine(cursor_or_db.connection.get_dsn_parameters) | ||||||
) | ||||||
if is_psycopg2: | ||||||
connection_params = cursor_or_db.connection.get_dsn_parameters() | ||||||
else: | ||||||
db_alias = db.alias if hasattr(db, "alias") else None | ||||||
|
||||||
if hasattr(db, "vendor"): | ||||||
span.set_data(SPANDATA.DB_SYSTEM, db.vendor) | ||||||
|
||||||
# Use pre-cached configuration | ||||||
cached_config = _cached_db_configs.get(db_alias) if db_alias else None | ||||||
if cached_config: | ||||||
if cached_config.get("db_name"): | ||||||
span.set_data(SPANDATA.DB_NAME, cached_config["db_name"]) | ||||||
if cached_config.get("host"): | ||||||
span.set_data(SPANDATA.SERVER_ADDRESS, cached_config["host"]) | ||||||
if cached_config.get("port"): | ||||||
span.set_data(SPANDATA.SERVER_PORT, str(cached_config["port"])) | ||||||
if cached_config.get("unix_socket"): | ||||||
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, cached_config["unix_socket"]) | ||||||
if cached_config.get("vendor") and span._data.get(SPANDATA.DB_SYSTEM) is None: | ||||||
span.set_data(SPANDATA.DB_SYSTEM, cached_config["vendor"]) | ||||||
Comment on lines
+774
to
+783
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we move these to a separate function that just picks out the fields from a dictionary? Then we can use it here with |
||||||
|
||||||
return # Success - exit early | ||||||
antonpirker marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment is a bit superfluous
Suggested change
|
||||||
|
||||||
# Fallback to dynamic database metadata collection. | ||||||
# This is the edge case where db configuration is not in Django's `DATABASES` setting. | ||||||
try: | ||||||
# Fallback 1: Try db.get_connection_params() first (NO CONNECTION ACCESS) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we also cache the results of Fallback 1 and 2? Otherwise we keep potentially hitting the database in these cases |
||||||
logger.debug( | ||||||
"Cached db connection params retrieval failed for %s. Trying db.get_connection_params().", | ||||||
db_alias, | ||||||
) | ||||||
Comment on lines
+791
to
+794
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment re: logging and async code, would remove |
||||||
try: | ||||||
# psycopg3, only extract needed params as get_parameters | ||||||
# can be slow because of the additional logic to filter out default | ||||||
# values | ||||||
connection_params = { | ||||||
"dbname": cursor_or_db.connection.info.dbname, | ||||||
"port": cursor_or_db.connection.info.port, | ||||||
} | ||||||
# PGhost returns host or base dir of UNIX socket as an absolute path | ||||||
# starting with /, use it only when it contains host | ||||||
pg_host = cursor_or_db.connection.info.host | ||||||
if pg_host and not pg_host.startswith("/"): | ||||||
connection_params["host"] = pg_host | ||||||
except Exception: | ||||||
connection_params = db.get_connection_params() | ||||||
|
||||||
db_name = connection_params.get("dbname") or connection_params.get("database") | ||||||
if db_name is not None: | ||||||
span.set_data(SPANDATA.DB_NAME, db_name) | ||||||
db_name = connection_params.get("dbname") or connection_params.get( | ||||||
"database" | ||||||
) | ||||||
if db_name: | ||||||
span.set_data(SPANDATA.DB_NAME, db_name) | ||||||
|
||||||
host = connection_params.get("host") | ||||||
if host: | ||||||
span.set_data(SPANDATA.SERVER_ADDRESS, host) | ||||||
|
||||||
port = connection_params.get("port") | ||||||
if port: | ||||||
span.set_data(SPANDATA.SERVER_PORT, str(port)) | ||||||
|
||||||
unix_socket = connection_params.get("unix_socket") | ||||||
if unix_socket: | ||||||
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, unix_socket) | ||||||
return # Success - exit early to avoid connection access | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This return doesn't really do anything -- fine with me to leave it though to signal that we're done with this branch/function, but in that case at least the comment should be removed since it's wrong
Suggested change
|
||||||
|
||||||
except (KeyError, ImproperlyConfigured, AttributeError): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can get a |
||||||
# Fallback 2: Last resort - direct connection access (CONNECTION POOL RISK) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should definitely be cached IMO |
||||||
logger.debug( | ||||||
"db.get_connection_params() failed for %s, trying direct connection access", | ||||||
db_alias, | ||||||
) | ||||||
|
||||||
# Some custom backends override `__getattr__`, making it look like `cursor_or_db` | ||||||
# actually has a `connection` and the `connection` has a `get_dsn_parameters` | ||||||
# attribute, only to throw an error once you actually want to call it. | ||||||
# Hence the `inspect` check whether `get_dsn_parameters` is an actual callable | ||||||
# function. | ||||||
is_psycopg2 = ( | ||||||
hasattr(cursor_or_db, "connection") | ||||||
and hasattr(cursor_or_db.connection, "get_dsn_parameters") | ||||||
and inspect.isroutine(cursor_or_db.connection.get_dsn_parameters) | ||||||
) | ||||||
if is_psycopg2: | ||||||
connection_params = cursor_or_db.connection.get_dsn_parameters() | ||||||
else: | ||||||
# psycopg3, only extract needed params as get_parameters | ||||||
# can be slow because of the additional logic to filter out default | ||||||
# values | ||||||
connection_params = { | ||||||
"dbname": cursor_or_db.connection.info.dbname, | ||||||
"port": cursor_or_db.connection.info.port, | ||||||
} | ||||||
# PGhost returns host or base dir of UNIX socket as an absolute path | ||||||
# starting with /, use it only when it contains host | ||||||
host = cursor_or_db.connection.info.host | ||||||
if host and not host.startswith("/"): | ||||||
connection_params["host"] = host | ||||||
|
||||||
db_name = connection_params.get("dbname") or connection_params.get( | ||||||
"database" | ||||||
) | ||||||
if db_name: | ||||||
span.set_data(SPANDATA.DB_NAME, db_name) | ||||||
|
||||||
host = connection_params.get("host") | ||||||
if host: | ||||||
span.set_data(SPANDATA.SERVER_ADDRESS, host) | ||||||
|
||||||
server_address = connection_params.get("host") | ||||||
if server_address is not None: | ||||||
span.set_data(SPANDATA.SERVER_ADDRESS, server_address) | ||||||
port = connection_params.get("port") | ||||||
if port: | ||||||
span.set_data(SPANDATA.SERVER_PORT, str(port)) | ||||||
|
||||||
server_port = connection_params.get("port") | ||||||
if server_port is not None: | ||||||
span.set_data(SPANDATA.SERVER_PORT, str(server_port)) | ||||||
unix_socket = connection_params.get("unix_socket") | ||||||
if unix_socket: | ||||||
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, unix_socket) | ||||||
|
||||||
server_socket_address = connection_params.get("unix_socket") | ||||||
if server_socket_address is not None: | ||||||
span.set_data(SPANDATA.SERVER_SOCKET_ADDRESS, server_socket_address) | ||||||
except Exception as e: | ||||||
logger.debug("Failed to get database connection params for %s: %s", db_alias, e) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this log we could keep since it's just emitted if there was an error |
||||||
# Skip database metadata rather than risk further connection issues | ||||||
|
||||||
|
||||||
def add_template_context_repr_sequence(): | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What exactly are we solving here? I understand why caching is a good idea in general but not sure what exactly is meant by connection pool interference here