diff --git a/.gitignore b/.gitignore index 2603311..1a913b0 100644 --- a/.gitignore +++ b/.gitignore @@ -212,3 +212,4 @@ test_venv examples/proprietary/ parsing_answer_guide.xlsx +.crush/ diff --git a/examples/debug_large_file_bottleneck.py b/examples/debug_large_file_bottleneck.py index eb59ef6..4faa4cd 100644 --- a/examples/debug_large_file_bottleneck.py +++ b/examples/debug_large_file_bottleneck.py @@ -41,7 +41,7 @@ def test_file_reading(): print(f"Read time: {read_time:.3f}s") print(f"Dimensions: {sheet_data.max_row + 1} x {sheet_data.max_column + 1}") print(f"Total cells: {cells:,}") - print(f"Read rate: {cells/read_time:,.0f} cells/sec") + print(f"Read rate: {cells / read_time:,.0f} cells/sec") return sheet_data @@ -64,7 +64,7 @@ def test_simple_detection(sheet_data): cells = (sheet_data.max_row + 1) * (sheet_data.max_column + 1) print(f"Detection time: {detection_time:.3f}s") - print(f"Detection rate: {cells/detection_time:,.0f} cells/sec") + print(f"Detection rate: {cells / detection_time:,.0f} cells/sec") print(f"Is simple table: {result.is_simple_table}") print(f"Confidence: {result.confidence}") print(f"Range: {result.table_range}") diff --git a/examples/parse_ground_truth.py b/examples/parse_ground_truth.py index 89b31ef..9f28160 100644 --- a/examples/parse_ground_truth.py +++ b/examples/parse_ground_truth.py @@ -72,9 +72,9 @@ def display_ground_truth(ground_truth): print(f" Tab: {tab_name}") print(f" Expected tables: {len(ranges)}") for i, range_str in enumerate(ranges): - print(f" {i+1}. {range_str}") + print(f" {i + 1}. {range_str}") - print(f"\n{'='*80}") + print(f"\n{'=' * 80}") print(f"Total files: {total_files}") print(f"Total expected tables: {total_tables}") print("=" * 80) diff --git a/examples/test_format_detection.py b/examples/test_format_detection.py index 57c2f41..ecbc445 100644 --- a/examples/test_format_detection.py +++ b/examples/test_format_detection.py @@ -86,9 +86,9 @@ async def test_format_detection(): print(f"❌ Error processing {file_path}: {e}") # Summary - print(f"\n{'='*50}") + print(f"\n{'=' * 50}") print("DETECTION SUMMARY") - print(f"{'='*50}") + print(f"{'=' * 50}") if results: print( @@ -116,7 +116,7 @@ async def test_format_detection(): for method, count in detection_methods.items(): print(f" - {method}: {count}") - print(f"\n{'='*50}") + print(f"\n{'=' * 50}") print("✅ Format detection test completed!") if any(r["mismatch"] for r in results): diff --git a/pyproject.toml b/pyproject.toml index 963f35c..1a5eb8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "gridgulp" -version = "0.3.4" +version = "0.3.5" description = "Simplified intelligent spreadsheet ingestion framework with automatic table detection" readme = "README.md" requires-python = ">=3.10" diff --git a/scripts/extract_dataframes.py b/scripts/extract_dataframes.py index 769a821..f3d961f 100755 --- a/scripts/extract_dataframes.py +++ b/scripts/extract_dataframes.py @@ -237,7 +237,7 @@ def save_results(self, results: list[FileExtractionResult], format: str = "json" f.write(f"- Tables detected: {total_detected}\n") f.write(f"- Tables extracted: {total_extracted}\n") f.write(f"- High quality tables (score > 0.7): {total_high_quality}\n") - f.write(f"- Overall success rate: {total_extracted/total_detected:.1%}\n\n") + f.write(f"- Overall success rate: {total_extracted / total_detected:.1%}\n\n") # File details f.write("## File Details\n\n") @@ -256,7 +256,7 @@ def save_results(self, results: list[FileExtractionResult], format: str = "json" f.write(f"- Tables detected: {sheet.total_tables_detected}\n") f.write(f"- Tables extracted: {sheet.tables_extracted}\n") f.write( - f"- Success rate: {sheet.tables_extracted/sheet.total_tables_detected:.1%}\n\n" + f"- Success rate: {sheet.tables_extracted / sheet.total_tables_detected:.1%}\n\n" ) # High quality tables for this sheet @@ -370,7 +370,7 @@ async def main(): print(f"Tables extracted: {total_extracted}") print(f"High quality tables: {total_high_quality}") if total_detected > 0: - print(f"Overall success rate: {total_extracted/total_detected:.1%}") + print(f"Overall success rate: {total_extracted / total_detected:.1%}") if __name__ == "__main__": diff --git a/scripts/save_extracted_csvs.py b/scripts/save_extracted_csvs.py index f97adad..b629c3b 100644 --- a/scripts/save_extracted_csvs.py +++ b/scripts/save_extracted_csvs.py @@ -69,7 +69,7 @@ def save_dataframes_as_csv(): # Convert to Excel-style range start_col_letter = chr(ord("A") + start_col) end_col_letter = chr(ord("A") + end_col) - range_str = f"{start_col_letter}{start_row+1}_{end_col_letter}{end_row+1}" + range_str = f"{start_col_letter}{start_row + 1}_{end_col_letter}{end_row + 1}" quality_score = table.get("quality_score", 0) csv_filename = f"{file_name}_{sheet_name}_{range_str}_q{quality_score:.2f}.csv" diff --git a/scripts/testing/run_tests_with_capture.py b/scripts/testing/run_tests_with_capture.py index 25250a6..669d527 100755 --- a/scripts/testing/run_tests_with_capture.py +++ b/scripts/testing/run_tests_with_capture.py @@ -250,10 +250,10 @@ async def run_performance_benchmarks(self): elapsed = time.time() - start times.append(elapsed) - print(f" Run {i+1}: {elapsed:.3f}s") + print(f" Run {i + 1}: {elapsed:.3f}s") except Exception as e: - print(f" Run {i+1}: Failed - {e}") + print(f" Run {i + 1}: Failed - {e}") if times: avg_time = sum(times) / len(times)