Skip to content

Commit eee390f

Browse files
Add reproduction for Sentry SDK clickhouse-driver generator issue
Co-authored-by: daniel.szoke <[email protected]>
1 parent 84adbb7 commit eee390f

File tree

5 files changed

+549
-0
lines changed

5 files changed

+549
-0
lines changed

reproduction/README.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# ClickHouse-Driver Generator Issue Reproduction
2+
3+
This directory contains a minimal reproduction for the Sentry SDK issue #4657:
4+
https://github.com/getsentry/sentry-python/issues/4657
5+
6+
## Issue Summary
7+
8+
When using a generator as a data source for INSERT queries with clickhouse-driver,
9+
the Sentry SDK's clickhouse-driver integration consumes the generator before it
10+
reaches clickhouse-driver, resulting in no data being inserted.
11+
12+
The bug occurs when `send_default_pii=True` is set, causing the integration to
13+
call `db_params.extend(data)` which exhausts the generator.
14+
15+
## Setup
16+
17+
1. Create a virtual environment:
18+
```bash
19+
python -m venv venv
20+
source venv/bin/activate # On Windows: venv\Scripts\activate
21+
```
22+
23+
2. Install dependencies:
24+
```bash
25+
pip install -r requirements.txt
26+
```
27+
28+
## Running the Reproduction
29+
30+
### Option 1: Simple Reproduction (Recommended)
31+
This shows the core issue clearly:
32+
33+
```bash
34+
python simple_reproduce.py
35+
```
36+
37+
Expected output:
38+
- TEST 1 (Generator): Shows the generator being consumed by Sentry, leaving 0 items for clickhouse-driver
39+
- TEST 2 (List): Shows that lists work correctly since they can be consumed multiple times
40+
41+
### Option 2: Comprehensive Test
42+
This includes multiple test scenarios:
43+
44+
```bash
45+
python reproduce_issue.py
46+
```
47+
48+
This script will:
49+
1. Test without Sentry SDK (works correctly)
50+
2. Test with Sentry SDK (fails - demonstrates the bug)
51+
3. Show the exact traceback scenario from the issue
52+
53+
## Key Code Location
54+
55+
The bug is in `/workspace/sentry_sdk/integrations/clickhouse_driver.py` at lines 141-143:
56+
57+
```python
58+
if should_send_default_pii():
59+
db_params = span._data.get("db.params", [])
60+
db_params.extend(data) # <-- This consumes the generator!
61+
span.set_data("db.params", db_params)
62+
```
63+
64+
## Workarounds
65+
66+
Until this is fixed, you can:
67+
68+
1. **Disable PII**: Set `send_default_pii=False` in `sentry_sdk.init()`
69+
2. **Use lists instead of generators**: Convert generators to lists before passing to `execute()`
70+
3. **Disable the integration**: Remove `ClickhouseDriverIntegration()` from your Sentry config
71+
72+
## Expected Fix
73+
74+
The integration should check if `data` is a generator and handle it appropriately,
75+
possibly by:
76+
- Not consuming generators when storing params
77+
- Converting to a reusable iterator (like `itertools.tee`)
78+
- Only storing a sample of the data rather than all of it

reproduction/reproduce_issue.py

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Minimal reproduction for Sentry SDK clickhouse-driver generator issue.
4+
Issue: https://github.com/getsentry/sentry-python/issues/4657
5+
6+
The problem: When using a generator as a data source for INSERT queries,
7+
the Sentry clickhouse-driver integration consumes the generator before
8+
it's passed to clickhouse-driver, resulting in no data being inserted.
9+
"""
10+
11+
import logging
12+
from typing import Generator, Dict, Any
13+
14+
# Set up logging to see what's happening
15+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16+
logger = logging.getLogger(__name__)
17+
18+
try:
19+
import sentry_sdk
20+
from sentry_sdk.integrations.clickhouse_driver import ClickhouseDriverIntegration
21+
logger.info(f"Sentry SDK version: {sentry_sdk.__version__}")
22+
except ImportError:
23+
logger.error("Failed to import sentry_sdk - make sure it's installed")
24+
raise
25+
26+
try:
27+
from clickhouse_driver import Client
28+
import clickhouse_driver
29+
logger.info(f"clickhouse-driver version: {clickhouse_driver.VERSION}")
30+
except ImportError:
31+
logger.error("Failed to import clickhouse_driver - run: pip install clickhouse-driver")
32+
raise
33+
34+
35+
# Mock clickhouse client to demonstrate the issue without requiring actual ClickHouse instance
36+
class MockClient:
37+
"""Mock ClickHouse client that logs when data is sent"""
38+
39+
def __init__(self):
40+
self.received_data = []
41+
42+
def execute(self, query: str, data=None):
43+
logger.info(f"Execute called with query: {query}")
44+
if data is not None:
45+
# This simulates clickhouse-driver consuming the generator
46+
consumed_data = list(data)
47+
logger.info(f"Data received by clickhouse-driver: {consumed_data}")
48+
self.received_data = consumed_data
49+
return None
50+
51+
52+
def create_data_generator() -> Generator[Dict[str, Any], None, None]:
53+
"""Create a generator that yields test data"""
54+
logger.info("Creating data generator")
55+
records = [
56+
{"id": 1, "name": "Test 1"},
57+
{"id": 2, "name": "Test 2"},
58+
{"id": 3, "name": "Test 3"}
59+
]
60+
for record in records:
61+
logger.info(f"Generator yielding: {record}")
62+
yield record
63+
64+
65+
def test_without_sentry():
66+
"""Test inserting data without Sentry SDK initialized"""
67+
logger.info("\n=== Testing WITHOUT Sentry SDK ===")
68+
69+
client = MockClient()
70+
71+
# Create generator
72+
data_gen = create_data_generator()
73+
74+
# Execute insert with generator
75+
client.execute("INSERT INTO test_table (id, name) VALUES", data_gen)
76+
77+
logger.info(f"Data received by MockClient: {client.received_data}")
78+
assert len(client.received_data) == 3, f"Expected 3 records, got {len(client.received_data)}"
79+
logger.info("✓ Test WITHOUT Sentry: PASSED")
80+
81+
82+
def test_with_sentry():
83+
"""Test inserting data with Sentry SDK initialized"""
84+
logger.info("\n=== Testing WITH Sentry SDK ===")
85+
86+
# Initialize Sentry with clickhouse-driver integration
87+
sentry_sdk.init(
88+
dsn="https://[email protected]/1", # Dummy DSN
89+
integrations=[ClickhouseDriverIntegration()],
90+
send_default_pii=True, # This triggers the bug!
91+
traces_sample_rate=1.0,
92+
)
93+
94+
# Monkey-patch to use our mock client
95+
original_client = Client
96+
97+
class PatchedClient(MockClient):
98+
def __init__(self, *args, **kwargs):
99+
super().__init__()
100+
# Need to add attributes that Sentry integration expects
101+
self.connection = type('Connection', (), {
102+
'host': 'localhost',
103+
'port': 9000,
104+
'database': 'default'
105+
})()
106+
107+
def send_data(self, *args):
108+
"""This method gets wrapped by Sentry"""
109+
logger.info(f"send_data called with args: {args}")
110+
if len(args) >= 3:
111+
data = args[2]
112+
# Try to consume the data
113+
try:
114+
consumed = list(data)
115+
logger.info(f"send_data consumed data: {consumed}")
116+
except Exception as e:
117+
logger.error(f"Error consuming data in send_data: {e}")
118+
119+
# Replace the import
120+
clickhouse_driver.client.Client = PatchedClient
121+
122+
try:
123+
# Create client (will be our patched version)
124+
client = Client()
125+
126+
# Create generator
127+
data_gen = create_data_generator()
128+
129+
# The integration will wrap send_data and consume the generator here
130+
# Before the actual clickhouse-driver gets to use it
131+
client.execute("INSERT INTO test_table (id, name) VALUES", data_gen)
132+
133+
logger.info(f"Data received by MockClient: {client.received_data}")
134+
135+
# This will fail because the generator was consumed by Sentry integration
136+
assert len(client.received_data) == 3, f"Expected 3 records, got {len(client.received_data)}"
137+
logger.info("✓ Test WITH Sentry: PASSED")
138+
139+
except AssertionError:
140+
logger.error("✗ Test WITH Sentry: FAILED - No data received (generator was consumed)")
141+
raise
142+
finally:
143+
# Restore original
144+
clickhouse_driver.client.Client = original_client
145+
146+
147+
def demonstrate_traceback_generator():
148+
"""Demonstrate the exact traceback from the issue"""
149+
logger.info("\n=== Demonstrating Traceback with Exception Generator ===")
150+
151+
# Initialize Sentry
152+
sentry_sdk.init(
153+
dsn="https://[email protected]/1",
154+
integrations=[ClickhouseDriverIntegration()],
155+
send_default_pii=True,
156+
traces_sample_rate=1.0,
157+
)
158+
159+
def exception_generator():
160+
"""Generator that throws when consumed"""
161+
raise ValueError("sh*t, someone ate my data")
162+
yield # Never reached
163+
164+
class TracebackClient(MockClient):
165+
def __init__(self, *args, **kwargs):
166+
super().__init__()
167+
self.connection = type('Connection', (), {
168+
'host': 'localhost',
169+
'port': 9000,
170+
'database': 'default',
171+
'_sentry_span': None
172+
})()
173+
174+
def send_data(self, sample_block, data, *args):
175+
"""This simulates the actual clickhouse-driver send_data signature"""
176+
logger.info("Original send_data called")
177+
# This is where clickhouse-driver would normally consume the data
178+
# But Sentry's wrapper already consumed it!
179+
try:
180+
list(data)
181+
except Exception as e:
182+
logger.info(f"Expected: data already consumed by Sentry wrapper")
183+
184+
original_client = Client
185+
clickhouse_driver.client.Client = TracebackClient
186+
187+
try:
188+
client = Client()
189+
190+
# This will throw in Sentry's wrapper
191+
try:
192+
client.send_data(None, exception_generator())
193+
except ValueError as e:
194+
logger.error(f"Exception raised in Sentry wrapper: {e}")
195+
logger.info("This proves the generator is consumed by Sentry before clickhouse-driver uses it")
196+
197+
finally:
198+
clickhouse_driver.client.Client = original_client
199+
200+
201+
if __name__ == "__main__":
202+
logger.info("Starting clickhouse-driver generator issue reproduction...\n")
203+
204+
# Test 1: Without Sentry (should work)
205+
try:
206+
test_without_sentry()
207+
except Exception as e:
208+
logger.error(f"Test without Sentry failed: {e}")
209+
210+
# Test 2: With Sentry (will fail due to bug)
211+
try:
212+
test_with_sentry()
213+
except AssertionError:
214+
logger.info("Expected failure - this demonstrates the bug")
215+
216+
# Test 3: Show exact traceback scenario
217+
try:
218+
demonstrate_traceback_generator()
219+
except Exception as e:
220+
logger.error(f"Traceback demonstration error: {e}")
221+
222+
logger.info("\n✓ Reproduction complete!")
223+
logger.info("The issue is confirmed: Sentry's clickhouse-driver integration")
224+
logger.info("consumes generators before they reach clickhouse-driver.")

reproduction/requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Install local sentry-python SDK in editable mode
2+
-e /workspace
3+
4+
# Install clickhouse-driver
5+
clickhouse-driver==0.2.9

0 commit comments

Comments
 (0)