Formatting and linting

fordN · fordN · commit 7b549f6d3b7c · 2025-10-14T17:34:15.000+02:00
diff --git a/src/amp/client.py b/src/amp/client.py
@@ -9,7 +9,13 @@
 from .config.connection_manager import ConnectionManager
 from .loaders.registry import create_loader, get_available_loaders
 from .loaders.types import LoadConfig, LoadMode, LoadResult
-from .streaming import ParallelConfig, ParallelStreamExecutor, ReorgAwareStream, ResumeWatermark, StreamingResultIterator
+from .streaming import (
+    ParallelConfig,
+    ParallelStreamExecutor,
+    ReorgAwareStream,
+    ResumeWatermark,
+    StreamingResultIterator,
+)
 
 
 class QueryBuilder:
@@ -57,7 +63,7 @@ def load(
 
         # Validate that parallel_config is only used with stream=True
         if kwargs.get('parallel_config'):
-            raise ValueError("parallel_config requires stream=True")
+            raise ValueError('parallel_config requires stream=True')
 
         # Default to batch streaming (read_all=False) for memory efficiency
         kwargs.setdefault('read_all', False)
@@ -238,7 +244,7 @@ def _load_table(
                 table_name=table_name,
                 loader_type=loader,
                 success=False,
-                error=str(e)
+                error=str(e),
             )
 
     def _load_stream(
@@ -264,7 +270,7 @@ def _load_stream(
                 table_name=table_name,
                 loader_type=loader,
                 success=False,
-                error=str(e)
+                error=str(e),
             )
 
     def query_and_load_streaming(
@@ -389,4 +395,3 @@ def query_and_load_streaming(
                 error=str(e),
                 metadata={'streaming_error': True},
             )
-
diff --git a/src/amp/loaders/base.py b/src/amp/loaders/base.py
@@ -14,7 +14,6 @@
 from ..streaming.types import BlockRange, ResponseBatchWithReorg
 from .types import LoadMode, LoadResult
 
-
 # Type variable for configuration classes
 TConfig = TypeVar('TConfig')
 
diff --git a/src/amp/streaming/parallel.py b/src/amp/streaming/parallel.py
@@ -1,12 +1,10 @@
 """
 Parallel streaming implementation for high-throughput data loading.
 
-This module implements parallel query execution using ThreadPoolExecutor. 
-It partitions streaming queries by block_num ranges using CTEs (Common Table Expressions) 
-that DataFusion inlines efficiently.
+This module implements parallel query execution using ThreadPoolExecutor.
+It partitions streaming queries by block_num ranges
 
 Key design decisions:
-- Uses CTEs to shadow table names with filtered versions for clean partitioning
 - Only supports streaming queries (not regular load operations)
 - Block range partitioning only (block_num or _block_num columns)
 """
@@ -175,8 +173,7 @@ def wrap_query_with_partition(self, user_query: str, partition: QueryPartition)
 
         # Create partition filter
         partition_filter = (
-            f"{partition.block_column} >= {partition.start_block} "
-            f"AND {partition.block_column} < {partition.end_block}"
+            f'{partition.block_column} >= {partition.start_block} AND {partition.block_column} < {partition.end_block}'
         )
 
         # Check if query already has a WHERE clause (case-insensitive)
@@ -201,11 +198,7 @@ def wrap_query_with_partition(self, user_query: str, partition: QueryPartition)
                     end_pos = keyword_pos
 
             # Insert partition filter with AND
-            partitioned_query = (
-                user_query[:end_pos] +
-                f" AND ({partition_filter})" +
-                user_query[end_pos:]
-            )
+            partitioned_query = user_query[:end_pos] + f' AND ({partition_filter})' + user_query[end_pos:]
         else:
             # No WHERE clause - add one before ORDER BY, LIMIT, GROUP BY, or SETTINGS
             end_keywords = [' ORDER BY ', ' LIMIT ', ' GROUP BY ', ' SETTINGS ']
@@ -217,11 +210,7 @@ def wrap_query_with_partition(self, user_query: str, partition: QueryPartition)
                     insert_pos = keyword_pos
 
             # Insert WHERE clause with partition filter
-            partitioned_query = (
-                user_query[:insert_pos] +
-                f" WHERE {partition_filter}" +
-                user_query[insert_pos:]
-            )
+            partitioned_query = user_query[:insert_pos] + f' WHERE {partition_filter}' + user_query[insert_pos:]
 
         return partitioned_query
 
@@ -270,7 +259,7 @@ def _detect_current_max_block(self) -> int:
         Raises:
             RuntimeError: If query fails or returns no results
         """
-        query = f"SELECT MAX({self.config.block_column}) as max_block FROM {self.config.table_name}"
+        query = f'SELECT MAX({self.config.block_column}) as max_block FROM {self.config.table_name}'
         self.logger.info(f'Detecting current max block with query: {query}')
 
         try:
@@ -290,7 +279,7 @@ def _detect_current_max_block(self) -> int:
 
         except Exception as e:
             self.logger.error(f'Failed to detect max block: {e}')
-            raise RuntimeError(f'Failed to detect current max block from {self.config.table_name}: {e}')
+            raise RuntimeError(f'Failed to detect current max block from {self.config.table_name}: {e}') from e
 
     def execute_parallel_stream(
         self, user_query: str, destination: str, connection_name: str, load_config: Optional[Dict[str, Any]] = None
@@ -443,20 +432,16 @@ def execute_parallel_stream(
             # Add block filter to start from (detected_max - buffer) to catch potential reorgs
             # Check if query already has WHERE clause
             where_pos = streaming_query_upper.find(' WHERE ')
-            block_filter = f"{self.config.block_column} >= {continuous_start_block}"
+            block_filter = f'{self.config.block_column} >= {continuous_start_block}'
 
             if where_pos != -1:
                 # Has WHERE clause - append with AND
                 # Find position after WHERE keyword
                 insert_pos = where_pos + len(' WHERE ')
-                streaming_query = (
-                    streaming_query[:insert_pos] +
-                    f"({block_filter}) AND " +
-                    streaming_query[insert_pos:]
-                )
+                streaming_query = streaming_query[:insert_pos] + f'({block_filter}) AND ' + streaming_query[insert_pos:]
             else:
                 # No WHERE clause - add one before SETTINGS if present
-                streaming_query += f" WHERE {block_filter}"
+                streaming_query += f' WHERE {block_filter}'
 
             # Now add streaming settings for continuous mode
             streaming_query += ' SETTINGS stream = true'
@@ -521,7 +506,7 @@ def _execute_partition(
                 destination=destination,
                 connection_name=connection_name,
                 read_all=False,  # Stream batches for memory efficiency
-                **load_config
+                **load_config,
             )
 
             # Aggregate results from streaming iterator
@@ -543,7 +528,7 @@ def _execute_partition(
             self.logger.info(
                 f'Worker {partition.partition_id} completed: '
                 f'{total_rows:,} rows in {duration:.2f}s '
-                f'({batch_count} batches, {total_rows/duration:.0f} rows/sec)'
+                f'({batch_count} batches, {total_rows / duration:.0f} rows/sec)'
             )
 
             # Return aggregated result
@@ -603,4 +588,4 @@ def _log_final_stats(self):
                 f'avg throughput: {avg_throughput:,.0f} rows/sec per worker'
             )
         else:
-            self.logger.error(f'Parallel execution failed: all {self._stats.workers_failed} workers failed')
+            self.logger.error(f'Parallel execution failed: all {self._stats.workers_failed} workers failed')