@@ -85,6 +85,7 @@ class ClickhouseApi:
8585 def __init__ (self , database : str | None , clickhouse_settings : ClickhouseSettings ):
8686 self .database = database
8787 self .clickhouse_settings = clickhouse_settings
88+ self .erase_batch_size = clickhouse_settings .erase_batch_size
8889 self .client = clickhouse_connect .get_client (
8990 host = clickhouse_settings .host ,
9091 port = clickhouse_settings .port ,
@@ -248,22 +249,34 @@ def insert(self, table_name, records, table_structure: TableStructure = None):
248249
249250 def erase (self , table_name , field_name , field_values ):
250251 field_name = ',' .join (field_name )
251- field_values = ', ' .join (f'({ v } )' for v in field_values )
252- query = DELETE_QUERY .format (** {
253- 'db_name' : self .database ,
254- 'table_name' : table_name ,
255- 'field_name' : field_name ,
256- 'field_values' : field_values ,
257- })
258- t1 = time .time ()
259- self .execute_command (query )
260- t2 = time .time ()
261- duration = t2 - t1
252+
253+ # Batch large deletions to avoid ClickHouse max query size limit
254+ field_values_list = list (field_values )
255+
256+ total_duration = 0.0
257+ total_records = len (field_values_list )
258+
259+ for i in range (0 , len (field_values_list ), self .erase_batch_size ):
260+ batch = field_values_list [i :i + self .erase_batch_size ]
261+ batch_field_values = ', ' .join (f'({ v } )' for v in batch )
262+
263+ query = DELETE_QUERY .format (** {
264+ 'db_name' : self .database ,
265+ 'table_name' : table_name ,
266+ 'field_name' : field_name ,
267+ 'field_values' : batch_field_values ,
268+ })
269+
270+ t1 = time .time ()
271+ self .execute_command (query )
272+ t2 = time .time ()
273+ total_duration += (t2 - t1 )
274+
262275 self .stats .on_event (
263276 table_name = table_name ,
264- duration = duration ,
277+ duration = total_duration ,
265278 is_insert = False ,
266- records = len ( field_values ) ,
279+ records = total_records ,
267280 )
268281
269282 def drop_database (self , db_name ):
0 commit comments