diff --git a/.jules/bolt.md b/.jules/bolt.md index e004145..3b1ea94 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -33,3 +33,6 @@ ## 2025-05-27 - [Bulk SQLite Inserts and Connection Reuse for Tagging] **Learning:** Sequential `.execute` calls for `INSERT OR REPLACE` inside nested loops over large arrays (like tags) coupled with opening independent DB connections per method creates a severe N+1 problem. Benchmarks showed replacing it with a single shared connection and `executemany` arrays resulted in an ~2x speedup on typical batch tagging workloads. **Action:** Always batch related SQL records using `.executemany()` and pass an optional `db_connection` downstream to nested operations instead of establishing a new database connection every time. +## 2025-05-15 - Batched Inserts with executemany() in SQLite +**Learning:** Sequential `execute()` statements in a loop for database inserts create an N+1 performance bottleneck due to excessive database round-trips and individual transaction overheads. +**Action:** Always consolidate loops of database inserts into a single `executemany()` call, constructing the argument list outside the database connection context to minimize locking time and eliminate the N+1 problem. diff --git a/interactive_batch_processor.py b/interactive_batch_processor.py index d28c58f..799d652 100644 --- a/interactive_batch_processor.py +++ b/interactive_batch_processor.py @@ -1357,22 +1357,32 @@ def _execute_user_decision(self, session_id: str, group: BatchGroup, user_decisi def _record_user_decision(self, session_id: str, group: BatchGroup, user_decision: Dict[str, Any], result: Dict[str, Any]): """Record user decision for learning""" try: - with sqlite3.connect(self.batch_db_path) as conn: - for fp in group.file_previews: - conn.execute(""" + now_iso = datetime.now().isoformat() + user_action = user_decision.get("action", "unknown") + user_decision_json = json.dumps(user_decision) + + # ⚡ Bolt: Batch database inserts to fix N+1 performance bottleneck + records = [ + ( + hashlib.md5(f"{session_id}_{fp.file_path}_{now_iso}".encode()).hexdigest()[:12], + session_id, + fp.file_path, + fp.predicted_category, + user_action, + now_iso, + user_decision_json + ) + for fp in group.file_previews + ] + + if records: + with sqlite3.connect(self.batch_db_path) as conn: + conn.executemany(""" INSERT INTO user_feedback (feedback_id, session_id, file_path, predicted_action, user_action, feedback_time, comments) VALUES (?, ?, ?, ?, ?, ?, ?) - """, ( - hashlib.md5(f"{session_id}_{fp.file_path}_{datetime.now().isoformat()}".encode()).hexdigest()[:12], - session_id, - fp.file_path, - fp.predicted_category, - user_decision.get("action", "unknown"), - datetime.now().isoformat(), - json.dumps(user_decision) - )) - conn.commit() + """, records) + conn.commit() except Exception as e: self.logger.error(f"Error recording user decision: {e}")