edgeandnode
diff --git a/‎src/amp/loaders/implementations/deltalake_loader.py‎
Lines changed: 32 additions & 44 deletions b/‎src/amp/loaders/implementations/deltalake_loader.py‎
Lines changed: 32 additions & 44 deletions
diff --git a/‎src/amp/loaders/implementations/iceberg_loader.py‎
Lines changed: 6 additions & 5 deletions b/‎src/amp/loaders/implementations/iceberg_loader.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎src/amp/loaders/implementations/lmdb_loader.py‎
Lines changed: 34 additions & 42 deletions b/‎src/amp/loaders/implementations/lmdb_loader.py‎
Lines changed: 34 additions & 42 deletions
@@ -80,11 +80,11 @@ class DeltaLakeLoader(DataLoader[DeltaStorageConfig]):
     REQUIRES_SCHEMA_MATCH = False
     SUPPORTS_TRANSACTIONS = True
 
-    def __init__(self, config: Dict[str, Any]):
+    def __init__(self, config: Dict[str, Any], label_manager=None):
         if not DELTALAKE_AVAILABLE:
             raise ImportError("Delta Lake support requires 'deltalake' package. Install with: pip install deltalake")
 
-        super().__init__(config)
+        super().__init__(config, label_manager=label_manager)
 
         # Performance settings
         self.batch_size = config.get('batch_size', 10000)
@@ -644,17 +644,16 @@ def query_table(self, columns: Optional[List[str]] = None, limit: Optional[int]
             self.logger.error(f'Query failed: {e}')
             raise
 
-    def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str) -> None:
+    def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str, connection_name: str) -> None:
         """
         Handle blockchain reorganization by deleting affected rows from Delta Lake.
 
-        Delta Lake's versioning and transaction capabilities make this operation
-        particularly powerful - we can precisely delete affected data and even
-        roll back if needed using time travel features.
+        Uses the _amp_batch_id column for fast, indexed deletion of affected batches.
 
         Args:
             invalidation_ranges: List of block ranges to invalidate (reorg points)
             table_name: The table containing the data to invalidate (not used but kept for API consistency)
+            connection_name: The connection name (for state invalidation)
         """
         if not invalidation_ranges:
             return
@@ -665,62 +664,51 @@ def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str)
                 self.logger.warning('No Delta table connected, skipping reorg handling')
                 return
 
+            # Get affected batch IDs from state store
+            all_affected_batch_ids = []
+            for range_obj in invalidation_ranges:
+                affected_batch_ids = self.state_store.invalidate_from_block(
+                    connection_name, table_name, range_obj.network, range_obj.start
+                )
+                all_affected_batch_ids.extend(affected_batch_ids)
+
+            if not all_affected_batch_ids:
+                self.logger.info('No batches found to invalidate')
+                return
+
             # Load the current table data
             current_table = self._delta_table.to_pyarrow_table()
 
-            # Check if the table has metadata column
-            if '_meta_block_ranges' not in current_table.schema.names:
-                self.logger.warning("Delta table doesn't have '_meta_block_ranges' column, skipping reorg handling")
+            # Check if the table has batch_id column
+            if '_amp_batch_id' not in current_table.schema.names:
+                self.logger.warning("Delta table doesn't have '_amp_batch_id' column, skipping reorg handling")
                 return
 
             # Build a mask to identify rows to keep
+            batch_id_column = current_table['_amp_batch_id']
             keep_mask = pa.array([True] * current_table.num_rows)
 
-            # Process each row to check if it should be invalidated
-            meta_column = current_table['_meta_block_ranges']
-
+            # Mark rows for deletion if their batch_id matches any affected batch
+            batch_id_set = {bid.unique_id for bid in all_affected_batch_ids}
             for i in range(current_table.num_rows):
-                meta_json = meta_column[i].as_py()
-
-                if meta_json:
-                    try:
-                        ranges_data = json.loads(meta_json)
-
-                        # Ensure ranges_data is a list
-                        if not isinstance(ranges_data, list):
-                            continue
-
-                        # Check each invalidation range
-                        for range_obj in invalidation_ranges:
-                            network = range_obj.network
-                            reorg_start = range_obj.start
-
-                            # Check if any range for this network should be invalidated
-                            for range_info in ranges_data:
-                                if (
-                                    isinstance(range_info, dict)
-                                    and range_info.get('network') == network
-                                    and range_info.get('end', 0) >= reorg_start
-                                ):
-                                    # Mark this row for deletion
-                                    # Create a mask for this specific row
-                                    row_mask = pa.array([j == i for j in range(current_table.num_rows)])
-                                    keep_mask = pa.compute.and_(keep_mask, pa.compute.invert(row_mask))
-                                    break
-
-                    except (json.JSONDecodeError, KeyError):
-                        pass
+                batch_id_str = batch_id_column[i].as_py()
+                if batch_id_str:
+                    # Check if any of the batch IDs in this row match affected batches
+                    for batch_id in batch_id_str.split('|'):
+                        if batch_id in batch_id_set:
+                            row_mask = pa.array([j == i for j in range(current_table.num_rows)])
+                            keep_mask = pa.compute.and_(keep_mask, pa.compute.invert(row_mask))
+                            break
 
             # Filter the table to keep only valid rows
             filtered_table = current_table.filter(keep_mask)
             deleted_count = current_table.num_rows - filtered_table.num_rows
 
             if deleted_count > 0:
                 # Overwrite the table with filtered data
-                # This creates a new version in Delta Lake, preserving history
                 self.logger.info(
                     f'Executing blockchain reorg deletion for {len(invalidation_ranges)} networks '
-                    f'in Delta Lake table. Deleting {deleted_count} rows.'
+                    f'in Delta Lake table. Deleting {deleted_count} rows affected by {len(all_affected_batch_ids)} batches.'
                 )
 
                 # Use overwrite mode to replace table contents
 
@@ -76,13 +76,13 @@ class IcebergLoader(DataLoader[IcebergStorageConfig]):
     REQUIRES_SCHEMA_MATCH = False
     SUPPORTS_TRANSACTIONS = True
 
-    def __init__(self, config: Dict[str, Any]):
+    def __init__(self, config: Dict[str, Any], label_manager=None):
         if not ICEBERG_AVAILABLE:
             raise ImportError(
                 "Apache Iceberg support requires 'pyiceberg' package. Install with: pip install pyiceberg"
             )
 
-        super().__init__(config)
+        super().__init__(config, label_manager=label_manager)
 
         self._catalog: Optional[IcebergCatalog] = None
         self._current_table: Optional[IcebergTable] = None
@@ -283,7 +283,7 @@ def _validate_schema_compatibility(self, iceberg_table: IcebergTable, arrow_sche
             # Evolution mode: evolve schema to accommodate new fields
             self._evolve_schema_if_needed(iceberg_table, iceberg_schema, arrow_schema)
 
-    def _validate_schema_strict(self, iceberg_schema: IcebergSchema, arrow_schema: pa.Schema) -> None:
+    def _validate_schema_strict(self, iceberg_schema: 'IcebergSchema', arrow_schema: pa.Schema) -> None:
         """Validate schema compatibility in strict mode (no evolution)"""
         iceberg_field_names = {field.name for field in iceberg_schema.fields}
         arrow_field_names = {field.name for field in arrow_schema}
@@ -304,7 +304,7 @@ def _validate_schema_strict(self, iceberg_schema: IcebergSchema, arrow_schema: p
         self.logger.debug('Schema validation passed in strict mode')
 
     def _evolve_schema_if_needed(
-        self, iceberg_table: IcebergTable, iceberg_schema: IcebergSchema, arrow_schema: pa.Schema
+        self, iceberg_table: 'IcebergTable', iceberg_schema: 'IcebergSchema', arrow_schema: pa.Schema
     ) -> None:
         """Evolve the Iceberg table schema to accommodate new Arrow schema fields"""
         try:
@@ -506,7 +506,7 @@ def get_table_info(self, table_name: str) -> Dict[str, Any]:
             self.logger.error(f'Failed to get table info for {table_name}: {e}')
             return {'exists': False, 'error': str(e), 'table_name': table_name}
 
-    def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str) -> None:
+    def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str, connection_name: str) -> None:
         """
         Handle blockchain reorganization by deleting affected rows from Iceberg table.
 
@@ -518,6 +518,7 @@ def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str)
         Args:
             invalidation_ranges: List of block ranges to invalidate (reorg points)
             table_name: The table containing the data to invalidate
+            connection_name: The connection name (for state invalidation)
         """
         if not invalidation_ranges:
             return
 
@@ -64,8 +64,8 @@ class LMDBLoader(DataLoader[LMDBConfig]):
     REQUIRES_SCHEMA_MATCH = False
     SUPPORTS_TRANSACTIONS = True
 
-    def __init__(self, config: Dict[str, Any]):
-        super().__init__(config)
+    def __init__(self, config: Dict[str, Any], label_manager=None):
+        super().__init__(config, label_manager=label_manager)
 
         self.env: Optional[lmdb.Environment] = None
         self.dbs: Dict[str, Any] = {}  # Cache opened databases
@@ -350,75 +350,67 @@ def get_table_info(self, table_name: str) -> Optional[Dict[str, Any]]:
             self.logger.error(f'Failed to get table info: {e}')
             return None
 
-    def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str) -> None:
+    def _handle_reorg(self, invalidation_ranges: List[BlockRange], table_name: str, connection_name: str) -> None:
         """
         Handle blockchain reorganization by deleting affected entries from LMDB.
 
-        LMDB's key-value architecture requires iterating through entries to find
-        and delete affected data based on the metadata stored in each value.
+        Uses the _amp_batch_id column for fast deletion of affected batches.
 
         Args:
             invalidation_ranges: List of block ranges to invalidate (reorg points)
             table_name: The table containing the data to invalidate
+            connection_name: The connection name (for state invalidation)
         """
         if not invalidation_ranges:
             return
 
         try:
+            # Get affected batch IDs from state store
+            all_affected_batch_ids = []
+            for range_obj in invalidation_ranges:
+                affected_batch_ids = self.state_store.invalidate_from_block(
+                    connection_name, table_name, range_obj.network, range_obj.start
+                )
+                all_affected_batch_ids.extend(affected_batch_ids)
+
+            if not all_affected_batch_ids:
+                self.logger.info('No batches found to invalidate')
+                return
+
+            batch_id_set = {bid.unique_id for bid in all_affected_batch_ids}
+
             db = self._get_or_create_db(self.config.database_name)
             deleted_count = 0
 
             with self.env.begin(write=True, db=db) as txn:
                 cursor = txn.cursor()
                 keys_to_delete = []
 
-                # First pass: identify keys to delete
+                # First pass: identify keys to delete based on batch_id
                 if cursor.first():
                     while True:
                         key = cursor.key()
                         value = cursor.value()
 
-                        # Deserialize the Arrow batch to check metadata
+                        # Deserialize the Arrow batch to check batch_id
                         try:
                             # Read the serialized Arrow batch
                             reader = pa.ipc.open_stream(value)
                             batch = reader.read_next_batch()
 
-                            # Check if this batch has metadata column
-                            if '_meta_block_ranges' in batch.schema.names:
-                                # Get the metadata (should be a single row)
-                                meta_idx = batch.schema.get_field_index('_meta_block_ranges')
-                                meta_json = batch.column(meta_idx)[0].as_py()
-
-                                if meta_json:
-                                    try:
-                                        ranges_data = json.loads(meta_json)
-
-                                        # Ensure ranges_data is a list
-                                        if not isinstance(ranges_data, list):
-                                            continue
-
-                                        # Check each invalidation range
-                                        for range_obj in invalidation_ranges:
-                                            network = range_obj.network
-                                            reorg_start = range_obj.start
-
-                                            # Check if any range for this network should be invalidated
-                                            for range_info in ranges_data:
-                                                if (
-                                                    isinstance(range_info, dict)
-                                                    and range_info.get('network') == network
-                                                    and range_info.get('end', 0) >= reorg_start
-                                                ):
-                                                    keys_to_delete.append(key)
-                                                    deleted_count += 1
-                                                    break
-
-                                            if key in keys_to_delete:
-                                                break
-
-                                    except (json.JSONDecodeError, KeyError):
-                                        pass
+                            # Check if this batch has batch_id column
+                            if '_amp_batch_id' in batch.schema.names:
+                                # Get the batch_id (should be a single row)
+                                batch_id_idx = batch.schema.get_field_index('_amp_batch_id')
+                                batch_id_str = batch.column(batch_id_idx)[0].as_py()
+
+                                if batch_id_str:
+                                    # Check if any of the batch IDs match affected batches
+                                    for batch_id in batch_id_str.split('|'):
+                                        if batch_id in batch_id_set:
+                                            keys_to_delete.append(key)
+                                            deleted_count += 1
+                                            break
 
                         except Exception as e:
                             self.logger.debug(f'Failed to deserialize entry: {e}')