1+ #!/usr/bin/env python3
2+ """
3+ Minimal reproduction for Sentry SDK clickhouse-driver generator issue.
4+ Issue: https://github.com/getsentry/sentry-python/issues/4657
5+
6+ The problem: When using a generator as a data source for INSERT queries,
7+ the Sentry clickhouse-driver integration consumes the generator before
8+ it's passed to clickhouse-driver, resulting in no data being inserted.
9+ """
10+
11+ import logging
12+ from typing import Generator , Dict , Any
13+
14+ # Set up logging to see what's happening
15+ logging .basicConfig (level = logging .INFO , format = '%(asctime)s - %(levelname)s - %(message)s' )
16+ logger = logging .getLogger (__name__ )
17+
18+ try :
19+ import sentry_sdk
20+ from sentry_sdk .integrations .clickhouse_driver import ClickhouseDriverIntegration
21+ logger .info (f"Sentry SDK version: { sentry_sdk .__version__ } " )
22+ except ImportError :
23+ logger .error ("Failed to import sentry_sdk - make sure it's installed" )
24+ raise
25+
26+ try :
27+ from clickhouse_driver import Client
28+ import clickhouse_driver
29+ logger .info (f"clickhouse-driver version: { clickhouse_driver .VERSION } " )
30+ except ImportError :
31+ logger .error ("Failed to import clickhouse_driver - run: pip install clickhouse-driver" )
32+ raise
33+
34+
35+ # Mock clickhouse client to demonstrate the issue without requiring actual ClickHouse instance
36+ class MockClient :
37+ """Mock ClickHouse client that logs when data is sent"""
38+
39+ def __init__ (self ):
40+ self .received_data = []
41+
42+ def execute (self , query : str , data = None ):
43+ logger .info (f"Execute called with query: { query } " )
44+ if data is not None :
45+ # This simulates clickhouse-driver consuming the generator
46+ consumed_data = list (data )
47+ logger .info (f"Data received by clickhouse-driver: { consumed_data } " )
48+ self .received_data = consumed_data
49+ return None
50+
51+
52+ def create_data_generator () -> Generator [Dict [str , Any ], None , None ]:
53+ """Create a generator that yields test data"""
54+ logger .info ("Creating data generator" )
55+ records = [
56+ {"id" : 1 , "name" : "Test 1" },
57+ {"id" : 2 , "name" : "Test 2" },
58+ {"id" : 3 , "name" : "Test 3" }
59+ ]
60+ for record in records :
61+ logger .info (f"Generator yielding: { record } " )
62+ yield record
63+
64+
65+ def test_without_sentry ():
66+ """Test inserting data without Sentry SDK initialized"""
67+ logger .info ("\n === Testing WITHOUT Sentry SDK ===" )
68+
69+ client = MockClient ()
70+
71+ # Create generator
72+ data_gen = create_data_generator ()
73+
74+ # Execute insert with generator
75+ client .execute ("INSERT INTO test_table (id, name) VALUES" , data_gen )
76+
77+ logger .info (f"Data received by MockClient: { client .received_data } " )
78+ assert len (client .received_data ) == 3 , f"Expected 3 records, got { len (client .received_data )} "
79+ logger .info ("✓ Test WITHOUT Sentry: PASSED" )
80+
81+
82+ def test_with_sentry ():
83+ """Test inserting data with Sentry SDK initialized"""
84+ logger .info ("\n === Testing WITH Sentry SDK ===" )
85+
86+ # Initialize Sentry with clickhouse-driver integration
87+ sentry_sdk .init (
88+ dsn = "https://[email protected] /1" ,
# Dummy DSN 89+ integrations = [ClickhouseDriverIntegration ()],
90+ send_default_pii = True , # This triggers the bug!
91+ traces_sample_rate = 1.0 ,
92+ )
93+
94+ # Monkey-patch to use our mock client
95+ original_client = Client
96+
97+ class PatchedClient (MockClient ):
98+ def __init__ (self , * args , ** kwargs ):
99+ super ().__init__ ()
100+ # Need to add attributes that Sentry integration expects
101+ self .connection = type ('Connection' , (), {
102+ 'host' : 'localhost' ,
103+ 'port' : 9000 ,
104+ 'database' : 'default'
105+ })()
106+
107+ def send_data (self , * args ):
108+ """This method gets wrapped by Sentry"""
109+ logger .info (f"send_data called with args: { args } " )
110+ if len (args ) >= 3 :
111+ data = args [2 ]
112+ # Try to consume the data
113+ try :
114+ consumed = list (data )
115+ logger .info (f"send_data consumed data: { consumed } " )
116+ except Exception as e :
117+ logger .error (f"Error consuming data in send_data: { e } " )
118+
119+ # Replace the import
120+ clickhouse_driver .client .Client = PatchedClient
121+
122+ try :
123+ # Create client (will be our patched version)
124+ client = Client ()
125+
126+ # Create generator
127+ data_gen = create_data_generator ()
128+
129+ # The integration will wrap send_data and consume the generator here
130+ # Before the actual clickhouse-driver gets to use it
131+ client .execute ("INSERT INTO test_table (id, name) VALUES" , data_gen )
132+
133+ logger .info (f"Data received by MockClient: { client .received_data } " )
134+
135+ # This will fail because the generator was consumed by Sentry integration
136+ assert len (client .received_data ) == 3 , f"Expected 3 records, got { len (client .received_data )} "
137+ logger .info ("✓ Test WITH Sentry: PASSED" )
138+
139+ except AssertionError :
140+ logger .error ("✗ Test WITH Sentry: FAILED - No data received (generator was consumed)" )
141+ raise
142+ finally :
143+ # Restore original
144+ clickhouse_driver .client .Client = original_client
145+
146+
147+ def demonstrate_traceback_generator ():
148+ """Demonstrate the exact traceback from the issue"""
149+ logger .info ("\n === Demonstrating Traceback with Exception Generator ===" )
150+
151+ # Initialize Sentry
152+ sentry_sdk .init (
153+ dsn = "https://[email protected] /1" ,
154+ integrations = [ClickhouseDriverIntegration ()],
155+ send_default_pii = True ,
156+ traces_sample_rate = 1.0 ,
157+ )
158+
159+ def exception_generator ():
160+ """Generator that throws when consumed"""
161+ raise ValueError ("sh*t, someone ate my data" )
162+ yield # Never reached
163+
164+ class TracebackClient (MockClient ):
165+ def __init__ (self , * args , ** kwargs ):
166+ super ().__init__ ()
167+ self .connection = type ('Connection' , (), {
168+ 'host' : 'localhost' ,
169+ 'port' : 9000 ,
170+ 'database' : 'default' ,
171+ '_sentry_span' : None
172+ })()
173+
174+ def send_data (self , sample_block , data , * args ):
175+ """This simulates the actual clickhouse-driver send_data signature"""
176+ logger .info ("Original send_data called" )
177+ # This is where clickhouse-driver would normally consume the data
178+ # But Sentry's wrapper already consumed it!
179+ try :
180+ list (data )
181+ except Exception as e :
182+ logger .info (f"Expected: data already consumed by Sentry wrapper" )
183+
184+ original_client = Client
185+ clickhouse_driver .client .Client = TracebackClient
186+
187+ try :
188+ client = Client ()
189+
190+ # This will throw in Sentry's wrapper
191+ try :
192+ client .send_data (None , exception_generator ())
193+ except ValueError as e :
194+ logger .error (f"Exception raised in Sentry wrapper: { e } " )
195+ logger .info ("This proves the generator is consumed by Sentry before clickhouse-driver uses it" )
196+
197+ finally :
198+ clickhouse_driver .client .Client = original_client
199+
200+
201+ if __name__ == "__main__" :
202+ logger .info ("Starting clickhouse-driver generator issue reproduction...\n " )
203+
204+ # Test 1: Without Sentry (should work)
205+ try :
206+ test_without_sentry ()
207+ except Exception as e :
208+ logger .error (f"Test without Sentry failed: { e } " )
209+
210+ # Test 2: With Sentry (will fail due to bug)
211+ try :
212+ test_with_sentry ()
213+ except AssertionError :
214+ logger .info ("Expected failure - this demonstrates the bug" )
215+
216+ # Test 3: Show exact traceback scenario
217+ try :
218+ demonstrate_traceback_generator ()
219+ except Exception as e :
220+ logger .error (f"Traceback demonstration error: { e } " )
221+
222+ logger .info ("\n ✓ Reproduction complete!" )
223+ logger .info ("The issue is confirmed: Sentry's clickhouse-driver integration" )
224+ logger .info ("consumes generators before they reach clickhouse-driver." )
0 commit comments