Alijanloo
diff --git a/‎examples/table_extraction_example.py‎
Lines changed: 37 additions & 0 deletions b/‎examples/table_extraction_example.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎pdf2table/usecases/dtos.py‎
Lines changed: 21 additions & 13 deletions b/‎pdf2table/usecases/dtos.py‎
Lines changed: 21 additions & 13 deletions
diff --git a/‎tests/README.md‎
Lines changed: 1 addition & 25 deletions b/‎tests/README.md‎
Lines changed: 1 addition & 25 deletions
diff --git a/‎tests/integration/__init__.py‎ b/‎tests/integration/__init__.py‎
diff --git a/‎tests/integration/test_table_extraction.py‎
Lines changed: 0 additions & 39 deletions b/‎tests/integration/test_table_extraction.py‎
Lines changed: 0 additions & 39 deletions
diff --git a/‎tests/samples/oxford-textbook-of-medicine-693.pdf‎
525 KB b/‎tests/samples/oxford-textbook-of-medicine-693.pdf‎
525 KB
@@ -0,0 +1,37 @@
+from pdf2table.frameworks.pipeline import create_pipeline
+
+# Create the extraction pipeline with configuration
+pipeline = create_pipeline(
+    device="cpu",
+    detection_threshold=0.9,
+    structure_threshold=0.6,
+    pdf_dpi=300,
+    load_ocr=False,
+    visualize=False
+)
+
+pdf_path = "tests/samples/oxford-textbook-of-medicine-693.pdf"
+
+# Extract tables from a specific page
+response = pipeline.extract_tables(pdf_path=pdf_path, page_number=0)
+
+# Or extract tables from all pages
+# response = pipeline.extract_tables(pdf_path=pdf_path)
+
+# Check if extraction was successful
+if response.success:
+    print(f"Successfully extracted {len(response.tables)} tables")
+    
+    # Access extracted tables
+    for table in response.tables:
+        print(f"Table with {len(table.grid.cells)} cells")
+        print(f"Grid size: {table.grid.n_rows} x {table.grid.n_cols}")
+    
+    # Convert to dictionary format
+    result_dict = response.to_dict()
+    print(result_dict)
+    
+    # Save results to JSON file
+    response.save_to_json("data/extracted_tables.json")
+else:
+    print(f"Extraction failed: {response.error_message}")
@@ -1,33 +1,33 @@
 import json
 from typing import List
 
-from pdf2table.entities.table_entities import  DetectedTable
+from pdf2table.entities.table_entities import DetectedTable
 
 
-class TableExtractionResponse:    
+class TableExtractionResponse:
     def __init__(self, tables: List[DetectedTable], source_file: str):
         self.tables = tables
         self.source_file = source_file
         self.success = True
         self.error_message = None
-    
+
     @classmethod
     def error(cls, error_message: str, source_file: str):
         """Create error response."""
         response = cls([], source_file)
         response.success = False
         response.error_message = error_message
         return response
-    
+
     def to_dict(self):
         """Convert response to dictionary format."""
         if not self.success:
             return {
                 "success": False,
                 "error": self.error_message,
-                "source_file": self.source_file
+                "source_file": self.source_file,
             }
-        
+
         return {
             "success": True,
             "source_file": self.source_file,
@@ -37,16 +37,24 @@ def to_dict(self):
                     "data": table.grid.to_row_format() if table.grid else [],
                 }
                 for table in self.tables
-            ]
+            ],
         }
-    
+
     def save_to_json(self, output_path: str):
         """
-        Save the extraction response to a JSON file.
-        
+        Save the extracted tables to a JSON file.
+
         Args:
             output_path: Path where the JSON file will be saved
         """
-        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
-
+        result_dict = {
+            "tables": [
+                {
+                    "metadata": table.metadata,
+                    "data": table.grid.to_row_format() if table.grid else [],
+                }
+                for table in self.tables
+            ]
+        }
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(result_dict, f, indent=2, ensure_ascii=False)
@@ -1,22 +1,4 @@
-# Test Suite Organization
-
-This directory contains a well-organized test suite following Clean Architecture principles.
-
-## Structure
-
-```
-tests/
-├── unit/                           # Fast unit tests with mocking
-│   ├── test_entities_and_use_cases.py    # Core business logic tests
-│   └── usecases/                        # Use case specific unit tests
-├── integration/                    # Integration tests with real dependencies
-│   └── test_table_extraction.py          # End-to-end table extraction tests
-├── samples/                        # Sample files for testing
-├── README.md
-└── __init__.py
-```
-
-## Running Tests
+# Running Tests
 
 ```bash
 # Run fast unit tests
@@ -28,9 +10,3 @@ python -m unittest discover -s tests/integration
 # Run all tests
 python -m unittest discover -s tests
 ```
-
-## Test Principles
-
-1. **Unit tests** should be fast and use mocking for external dependencies
-2. **Integration tests** can use real models but should be clearly marked
-4. Each layer should be testable in isolation