Merge pull request #12 from Ganymede-Bio/try-visualization

bensonlee5 · web-flow · commit 8e52f8492361 · 2025-08-19T10:27:53.000-07:00
Update documentation and code formatting
diff --git a/.gitignore b/.gitignore
@@ -212,3 +212,4 @@ test_venv
 
 examples/proprietary/
 parsing_answer_guide.xlsx
+.crush/
diff --git a/examples/debug_large_file_bottleneck.py b/examples/debug_large_file_bottleneck.py
@@ -41,7 +41,7 @@ def test_file_reading():
         print(f"Read time: {read_time:.3f}s")
         print(f"Dimensions: {sheet_data.max_row + 1} x {sheet_data.max_column + 1}")
         print(f"Total cells: {cells:,}")
-        print(f"Read rate: {cells/read_time:,.0f} cells/sec")
+        print(f"Read rate: {cells / read_time:,.0f} cells/sec")
 
         return sheet_data
 
@@ -64,7 +64,7 @@ def test_simple_detection(sheet_data):
     cells = (sheet_data.max_row + 1) * (sheet_data.max_column + 1)
 
     print(f"Detection time: {detection_time:.3f}s")
-    print(f"Detection rate: {cells/detection_time:,.0f} cells/sec")
+    print(f"Detection rate: {cells / detection_time:,.0f} cells/sec")
     print(f"Is simple table: {result.is_simple_table}")
     print(f"Confidence: {result.confidence}")
     print(f"Range: {result.table_range}")
diff --git a/examples/parse_ground_truth.py b/examples/parse_ground_truth.py
@@ -72,9 +72,9 @@ def display_ground_truth(ground_truth):
             print(f"  Tab: {tab_name}")
             print(f"  Expected tables: {len(ranges)}")
             for i, range_str in enumerate(ranges):
-                print(f"    {i+1}. {range_str}")
+                print(f"    {i + 1}. {range_str}")
 
-    print(f"\n{'='*80}")
+    print(f"\n{'=' * 80}")
     print(f"Total files: {total_files}")
     print(f"Total expected tables: {total_tables}")
     print("=" * 80)
diff --git a/examples/test_format_detection.py b/examples/test_format_detection.py
@@ -86,9 +86,9 @@ async def test_format_detection():
             print(f"❌ Error processing {file_path}: {e}")
 
     # Summary
-    print(f"\n{'='*50}")
+    print(f"\n{'=' * 50}")
     print("DETECTION SUMMARY")
-    print(f"{'='*50}")
+    print(f"{'=' * 50}")
 
     if results:
         print(
@@ -116,7 +116,7 @@ async def test_format_detection():
         for method, count in detection_methods.items():
             print(f"  - {method}: {count}")
 
-    print(f"\n{'='*50}")
+    print(f"\n{'=' * 50}")
     print("✅ Format detection test completed!")
 
     if any(r["mismatch"] for r in results):
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "gridgulp"
-version = "0.3.4"
+version = "0.3.5"
 description = "Simplified intelligent spreadsheet ingestion framework with automatic table detection"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/scripts/extract_dataframes.py b/scripts/extract_dataframes.py
@@ -237,7 +237,7 @@ def save_results(self, results: list[FileExtractionResult], format: str = "json"
                 f.write(f"- Tables detected: {total_detected}\n")
                 f.write(f"- Tables extracted: {total_extracted}\n")
                 f.write(f"- High quality tables (score > 0.7): {total_high_quality}\n")
-                f.write(f"- Overall success rate: {total_extracted/total_detected:.1%}\n\n")
+                f.write(f"- Overall success rate: {total_extracted / total_detected:.1%}\n\n")
 
                 # File details
                 f.write("## File Details\n\n")
@@ -256,7 +256,7 @@ def save_results(self, results: list[FileExtractionResult], format: str = "json"
                             f.write(f"- Tables detected: {sheet.total_tables_detected}\n")
                             f.write(f"- Tables extracted: {sheet.tables_extracted}\n")
                             f.write(
-                                f"- Success rate: {sheet.tables_extracted/sheet.total_tables_detected:.1%}\n\n"
+                                f"- Success rate: {sheet.tables_extracted / sheet.total_tables_detected:.1%}\n\n"
                             )
 
                             # High quality tables for this sheet
@@ -370,7 +370,7 @@ async def main():
     print(f"Tables extracted: {total_extracted}")
     print(f"High quality tables: {total_high_quality}")
     if total_detected > 0:
-        print(f"Overall success rate: {total_extracted/total_detected:.1%}")
+        print(f"Overall success rate: {total_extracted / total_detected:.1%}")
 
 
 if __name__ == "__main__":
diff --git a/scripts/save_extracted_csvs.py b/scripts/save_extracted_csvs.py
@@ -69,7 +69,7 @@ def save_dataframes_as_csv():
                     # Convert to Excel-style range
                     start_col_letter = chr(ord("A") + start_col)
                     end_col_letter = chr(ord("A") + end_col)
-                    range_str = f"{start_col_letter}{start_row+1}_{end_col_letter}{end_row+1}"
+                    range_str = f"{start_col_letter}{start_row + 1}_{end_col_letter}{end_row + 1}"
 
                     quality_score = table.get("quality_score", 0)
                     csv_filename = f"{file_name}_{sheet_name}_{range_str}_q{quality_score:.2f}.csv"
diff --git a/scripts/testing/run_tests_with_capture.py b/scripts/testing/run_tests_with_capture.py
@@ -250,10 +250,10 @@ async def run_performance_benchmarks(self):
                     elapsed = time.time() - start
                     times.append(elapsed)
 
-                    print(f"    Run {i+1}: {elapsed:.3f}s")
+                    print(f"    Run {i + 1}: {elapsed:.3f}s")
 
                 except Exception as e:
-                    print(f"    Run {i+1}: Failed - {e}")
+                    print(f"    Run {i + 1}: Failed - {e}")
 
             if times:
                 avg_time = sum(times) / len(times)

Original file line number	Diff line number	Diff line change
`@@ -212,3 +212,4 @@ test_venv`
`212`	`212`
`213`	`213`	`examples/proprietary/`
`214`	`214`	`parsing_answer_guide.xlsx`
	`215`	`+.crush/`