Alijanloo
diff --git a/‎README.md‎
Lines changed: 24 additions & 30 deletions b/‎README.md‎
Lines changed: 24 additions & 30 deletions
diff --git a/‎docs/architecture_guide.md‎
Lines changed: 23 additions & 38 deletions b/‎docs/architecture_guide.md‎
Lines changed: 23 additions & 38 deletions
diff --git a/‎pdf2table/adaptors/__init__.py‎ b/‎pdf2table/adaptors/__init__.py‎
diff --git a/‎pdf2table/adaptors/table_extraction_adaptor.py‎
Lines changed: 0 additions & 33 deletions b/‎pdf2table/adaptors/table_extraction_adaptor.py‎
Lines changed: 0 additions & 33 deletions
diff --git a/‎pdf2table/frameworks/pipeline.py‎
Lines changed: 77 additions & 0 deletions b/‎pdf2table/frameworks/pipeline.py‎
Lines changed: 77 additions & 0 deletions
@@ -37,51 +37,45 @@ pip install -e .
 
 ### Usage
 ```python
-from pdf2table.frameworks.table_extraction_factory import TableExtractionFactory
-
-# Initialize the factory
-factory = TableExtractionFactory()
-adapter = factory.create_table_extraction_adapter()
+from pdf2table.frameworks.pipeline import create_pipeline
+
+# Create the extraction pipeline with configuration
+pipeline = create_pipeline(
+    device="cpu",
+    detection_threshold=0.9,
+    structure_threshold=0.6,
+    pdf_dpi=300,
+    load_ocr=False,
+    visualize=False
+)
 
 # Extract tables from a specific page
-response = adapter.extract_tables(pdf_path="document.pdf", page_number=0)
+tables = pipeline.extract_tables(pdf_path="document.pdf", page_number=0)
 
 # Or extract tables from all pages
-response = adapter.extract_tables(pdf_path="document.pdf")
+all_tables = pipeline.extract_tables(pdf_path="document.pdf")
 
 # Access extracted tables
-for table in response.tables:
+for table in tables:
     print(f"Table with {len(table.grid.cells)} cells")
-    print(f"Grid size: {table.grid.rows} x {table.grid.columns}")
+    print(f"Grid size: {table.grid.n_rows} x {table.grid.n_cols}")
 
     # Convert to structured format
     table_data = table.to_dict()
     print(table_data)
 ```
 
-### High-Level Usage
-
-For simpler integration, use the high-level `TableExtractionService`:
-
-```python
-from pdf2table.frameworks.table_extraction_factory import TableExtractionService
+### Configuration Options
 
-# Initialize the service
-service = TableExtractionService(device="cpu")
+The `create_pipeline()` method accepts the following parameters:
 
-# Extract tables from a single page
-page_result = service.extract_tables_from_page("document.pdf", page_number=0)
-print(f"Found {len(page_result['tables'])} tables on page 0")
-
-# Extract tables from entire PDF (all pages)
-all_results = service.extract_tables_from_pdf("document.pdf")
-tables = all_results.get('tables', [])
-print(f"Found {len(tables)} total tables across all pages")
-
-# Process each table
-for table_idx, table in enumerate(tables):
-    print(f"  Table {table_idx + 1}: {table['metadata']}")
-```
+- `device` (str): Device for ML models - "cpu" or "cuda" (default: "cpu")
+- `detection_threshold` (float): Confidence threshold for table detection (default: 0.9)
+- `structure_threshold` (float): Confidence threshold for structure recognition (default: 0.6)
+- `pdf_dpi` (int): DPI for PDF page rendering (default: 300)
+- `load_ocr` (bool): Whether to load OCR service (default: False)
+- `visualize` (bool): Whether to enable visualization (default: False)
+- `visualization_save_dir` (str): Directory to save visualizations (default: "data/table_visualizations")
 
 ## 📋 Logging
 
 
@@ -6,16 +6,14 @@ pdf2table/
 ├── entities/
 │   └── table_entities.py
 ├── usecases/
-│   ├── dtos.py
 │   ├── services/
 │   │   └── table_services.py
+│   ├── interfaces/
 │   └── table_extraction_use_case.py
-├── adaptors/
-│   └── table_extraction_ports.py
 ├── frameworks/
 │   ├── ocr_service.py
 │   ├── pdf_image_extractor.py
-│   ├── table_extraction_factory.py
+│   ├── pipeline.py
 │   ├── table_structure_recognizer.py
 │   └── table_transformer_detector.py
 ```
@@ -35,57 +33,44 @@ pdf2table/
 - **table_extraction_use_case.py**: Application business logic
   - `TableExtractionUseCase`: Orchestrates table extraction workflow
     - `extract_tables(pdf_path, page_number=None)`: Main extraction method
+    - Returns list of `DetectedTable` objects
   - `TableGridBuilder`: Builds structured grids from detected cells
   - Contains the core algorithms for grouping rows/columns and building grids
 - **services/table_services.py**: Supporting services for use cases
   - `TableValidationService`: Validates detected table structures and cells
   - `CoordinateClusteringService`: Clusters coordinates for row/column grouping
-- **dtos.py**: Data transfer objects for use cases
-  - `TableExtractionResponse`: Response DTO for table extraction
+- **interfaces/**: Port interfaces for dependency inversion
 
-### 3. Interface Adapters Layer (`pdf2table/adaptors/`)
-- **table_extraction_adaptor.py**: Adapter for table extraction
-  - `TableExtractionAdapter`: Coordinates between use cases and external interfaces
-    - `extract_tables(pdf_path, page_number=None)`: Main adapter method
-      - Accepts `pdf_path` and optional `page_number`
-      - Returns `TableExtractionResponse`
-
-### 4. Frameworks & Drivers Layer (`pdf2table/frameworks/`)
+### 3. Frameworks & Drivers Layer (`pdf2table/frameworks/`)
 - **pdf_image_extractor.py**: PyMuPDF implementation
 - **table_transformer_detector.py**: Table detection using Transformer models
 - **table_structure_recognizer.py**: Structure recognition using Transformer models
 - **ocr_service.py**: TrOCR text extraction
-- **table_extraction_factory.py**: Dependency injection and configuration
-
-
-### Usage (Simple)
-```python
-from pdf2table.frameworks.table_extraction_factory import TableExtractionService
-
-service = TableExtractionService(device="cpu")
+- **pipeline.py**: Factory for creating configured pipelines
 
-# Extract from a specific page
-result = service.extract_tables_from_page(pdf_path, page_number=0)
-tables = result["tables"]
+## Usage
 
-# Or extract from all pages
-all_results = service.extract_tables_from_pdf(pdf_path)
-```
-
-### Usage (Advanced)
 ```python
-from pdf2table.frameworks.table_extraction_factory import TableExtractionFactory
+from pdf2table.frameworks.pipeline import create_pipeline
 
-# Create with custom configuration
-adapter = TableExtractionFactory.create_table_extraction_adapter(
-    device="cuda",
-    detection_threshold=0.95,
-    structure_threshold=0.7
+# Create the extraction pipeline
+use_case = create_pipeline(
+    device="cpu",
+    detection_threshold=0.9,
+    structure_threshold=0.6,
+    pdf_dpi=300,
+    load_ocr=False,
+    visualize=False
 )
 
 # Extract from a specific page
-response = adapter.extract_tables(pdf_path, page_number=0)
+tables = use_case.extract_tables(pdf_path, page_number=0)
 
 # Or extract from all pages
-response = adapter.extract_tables(pdf_path)
+all_tables = use_case.extract_tables(pdf_path)
+
+# Process the results
+for table in tables:
+    print(f"Found table with {table.grid.n_rows} rows and {table.grid.n_cols} columns")
+    table_dict = table.to_dict()
 ```
@@ -0,0 +1,77 @@
+from typing import Optional
+from pdf2table.usecases.table_extraction_use_case import TableExtractionUseCase
+from pdf2table.frameworks.pdf_image_extractor import PyMuPDFImageExtractor
+from pdf2table.frameworks.table_transformer_detector import TableTransformerDetector
+from pdf2table.frameworks.table_structure_recognizer import (
+    TableTransformerStructureRecognizer,
+)
+from pdf2table.frameworks.ocr_service import TrOCRService
+from pdf2table.frameworks.logging_config import get_logger
+
+
+logger = get_logger(__name__)
+
+
+def create_pipeline(
+    device: str = "cpu",
+    detection_threshold: float = 0.9,
+    structure_threshold: float = 0.6,
+    pdf_dpi: int = 300,
+    load_ocr: bool = False,
+    visualize: bool = False,
+    visualization_save_dir: str = "data/table_visualizations",
+) -> TableExtractionUseCase:
+    """
+    Create a fully configured table extraction pipeline.
+
+    Args:
+        device: Device to use for ML models ('cpu' or 'cuda')
+        detection_threshold: Confidence threshold for table detection
+        structure_threshold: Confidence threshold for structure recognition
+        pdf_dpi: DPI for PDF page rendering
+        load_ocr: Whether to load OCR service
+        visualize: Whether to enable visualization
+        visualization_save_dir: Directory to save visualizations
+
+    Returns:
+        TableExtractionUseCase: Configured use case ready for table extraction
+    """
+    logger.info(
+        f"Creating table extraction pipeline - Device: {device}, "
+        f"Detection threshold: {detection_threshold}, "
+        f"Structure threshold: {structure_threshold}, "
+        f"PDF DPI: {pdf_dpi}, OCR: {load_ocr}, Visualize: {visualize}"
+    )
+
+    logger.debug("Initializing PDF image extractor")
+    pdf_extractor = PyMuPDFImageExtractor(dpi=pdf_dpi)
+
+    logger.debug("Initializing table transformer detector")
+    table_detector = TableTransformerDetector(
+        device=device, confidence_threshold=detection_threshold
+    )
+
+    logger.debug("Initializing table structure recognizer")
+    structure_recognizer = TableTransformerStructureRecognizer(
+        device=device, confidence_threshold=structure_threshold
+    )
+
+    ocr_service: Optional[TrOCRService] = None
+    if load_ocr:
+        logger.debug("Initializing OCR service")
+        ocr_service = TrOCRService(device=device)
+    else:
+        logger.debug("OCR service disabled")
+
+    logger.debug("Creating table extraction use case")
+    use_case = TableExtractionUseCase(
+        pdf_extractor=pdf_extractor,
+        table_detector=table_detector,
+        structure_recognizer=structure_recognizer,
+        ocr_service=ocr_service,
+        visualize=visualize,
+        visualization_save_dir=visualization_save_dir,
+    )
+
+    logger.info("Table extraction pipeline created successfully")
+    return use_case