diff --git a/sentry_sdk/integrations/clickhouse_driver.py b/sentry_sdk/integrations/clickhouse_driver.py index 2561bfad04..bbaaaeec8e 100644 --- a/sentry_sdk/integrations/clickhouse_driver.py +++ b/sentry_sdk/integrations/clickhouse_driver.py @@ -11,7 +11,8 @@ # without introducing a hard dependency on `typing_extensions` # from: https://stackoverflow.com/a/71944042/300572 if TYPE_CHECKING: - from typing import ParamSpec, Callable + from collections.abc import Iterator + from typing import Any, ParamSpec, Callable else: # Fake ParamSpec class ParamSpec: @@ -49,9 +50,7 @@ def setup_once() -> None: ) # If the query contains parameters then the send_data function is used to send those parameters to clickhouse - clickhouse_driver.client.Client.send_data = _wrap_send_data( - clickhouse_driver.client.Client.send_data - ) + _wrap_send_data() # Every query ends either with the Client's `receive_end_of_query` (no result expected) # or its `receive_result` (result expected) @@ -128,23 +127,44 @@ def _inner_end(*args: P.args, **kwargs: P.kwargs) -> T: return _inner_end -def _wrap_send_data(f: Callable[P, T]) -> Callable[P, T]: - def _inner_send_data(*args: P.args, **kwargs: P.kwargs) -> T: - instance = args[0] # type: clickhouse_driver.client.Client - data = args[2] - span = getattr(instance.connection, "_sentry_span", None) +def _wrap_send_data() -> None: + original_send_data = clickhouse_driver.client.Client.send_data + + def _inner_send_data( # type: ignore[no-untyped-def] # clickhouse-driver does not type send_data + self, sample_block, data, types_check=False, columnar=False, *args, **kwargs + ): + span = getattr(self.connection, "_sentry_span", None) if span is not None: - _set_db_data(span, instance.connection) + _set_db_data(span, self.connection) if should_send_default_pii(): db_params = span._data.get("db.params", []) - db_params.extend(data) + + if isinstance(data, (list, tuple)): + db_params.extend(data) + + else: # data is a generic iterator + orig_data = data + + # Wrap the generator to add items to db.params as they are yielded. + # This allows us to send the params to Sentry without needing to allocate + # memory for the entire generator at once. + def wrapped_generator() -> "Iterator[Any]": + for item in orig_data: + db_params.append(item) + yield item + + # Replace the original iterator with the wrapped one. + data = wrapped_generator() + span.set_data("db.params", db_params) - return f(*args, **kwargs) + return original_send_data( + self, sample_block, data, types_check, columnar, *args, **kwargs + ) - return _inner_send_data + clickhouse_driver.client.Client.send_data = _inner_send_data def _set_db_data( diff --git a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py index 0675ad9ff5..635f9334c4 100644 --- a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py +++ b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py @@ -342,6 +342,38 @@ def test_clickhouse_client_spans( assert event["spans"] == expected_spans +def test_clickhouse_spans_with_generator(sentry_init, capture_events): + sentry_init( + integrations=[ClickhouseDriverIntegration()], + send_default_pii=True, + traces_sample_rate=1.0, + ) + events = capture_events() + + # Use a generator to test that the integration obtains values from the generator, + # without consuming the generator. + values = ({"x": i} for i in range(3)) + + with start_transaction(name="test_clickhouse_transaction"): + client = Client("localhost") + client.execute("DROP TABLE IF EXISTS test") + client.execute("CREATE TABLE test (x Int32) ENGINE = Memory") + client.execute("INSERT INTO test (x) VALUES", values) + res = client.execute("SELECT x FROM test") + + # Verify that the integration did not consume the generator + assert res == [(0,), (1,), (2,)] + + (event,) = events + spans = event["spans"] + + [span] = [ + span for span in spans if span["description"] == "INSERT INTO test (x) VALUES" + ] + + assert span["data"]["db.params"] == [{"x": 0}, {"x": 1}, {"x": 2}] + + def test_clickhouse_client_spans_with_pii( sentry_init, capture_events, capture_envelopes ) -> None: