Add more tests and refactor tui tests

raulcd · raulcd · commit 04fa01fffe00 · 2025-11-07T15:55:24.000+01:00
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -1,9 +1,12 @@
-"""Basic tests for datanomy."""
+"""Basic package-level tests for datanomy."""
 
 import datanomy
 
 
 def test_version() -> None:
-    """Test that version is defined."""
+    """Test that version is defined and valid."""
     assert hasattr(datanomy, "__version__")
     assert isinstance(datanomy.__version__, str)
+    assert len(datanomy.__version__) > 0
+    # Should be semver-ish (has dots)
+    assert "." in datanomy.__version__
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,78 @@
+"""Tests for the CLI module."""
+
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+from click.testing import CliRunner
+
+from datanomy.cli import main
+
+
+def test_cli_rejects_non_parquet_extension(tmp_path: Path) -> None:
+    """Test that CLI rejects files without .parquet extension."""
+    bad_file = tmp_path / "test.txt"
+    bad_file.write_text("not parquet")
+
+    runner = CliRunner()
+    result = runner.invoke(main, [str(bad_file)])
+
+    assert result.exit_code == 1
+    assert "does not appear to be a Parquet file" in result.output
+
+
+def test_cli_rejects_nonexistent_file() -> None:
+    """Test that CLI rejects files that don't exist."""
+    runner = CliRunner()
+    result = runner.invoke(main, ["/nonexistent/file.parquet"])
+
+    assert result.exit_code == 2
+    assert "does not exist" in result.output.lower()
+
+
+@patch("datanomy.cli.DatanomyApp")
+def test_cli_launches_app_with_valid_file(mock_app: Mock, simple_parquet: Path) -> None:
+    """Test that CLI launches the app with a valid Parquet file."""
+    # Mock the app to avoid actually running the TUI
+    mock_app_instance = Mock()
+    mock_app.return_value = mock_app_instance
+
+    runner = CliRunner()
+    result = runner.invoke(main, [str(simple_parquet)])
+
+    # Should have created an app instance
+    assert mock_app.called
+    # Should have called run on the app
+    assert mock_app_instance.run.called
+    # Should exit successfully
+    assert result.exit_code == 0
+
+
+@patch("datanomy.cli.DatanomyApp")
+@patch("datanomy.cli.ParquetReader")
+def test_cli_creates_reader(
+    mock_reader: Mock, mock_app: Mock, simple_parquet: Path
+) -> None:
+    """Test that CLI creates a ParquetReader with the correct file path."""
+    runner = CliRunner()
+    runner.invoke(main, [str(simple_parquet)])
+
+    # Should have created a reader with the file path
+    mock_reader.assert_called_once_with(simple_parquet)
+
+
+def test_cli_case_insensitive_extension(tmp_path: Path) -> None:
+    """Test that CLI accepts .PARQUET extension (case insensitive)."""
+    # Create a valid parquet file with uppercase extension
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+
+    file_path = tmp_path / "test.PARQUET"
+    table = pa.table({"id": [1, 2, 3]})
+    pq.write_table(table, file_path)
+
+    with patch("datanomy.cli.DatanomyApp"):
+        runner = CliRunner()
+        result = runner.invoke(main, [str(file_path)])
+
+        # Should accept uppercase extension
+        assert result.exit_code == 0
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -0,0 +1,61 @@
+"""Tests for the ParquetReader module."""
+
+from pathlib import Path
+
+from datanomy.reader import ParquetReader
+
+
+def test_reader_simple_file(simple_parquet: Path) -> None:
+    """Test reader with a simple Parquet file."""
+    reader = ParquetReader(simple_parquet)
+
+    assert reader.file_path == simple_parquet
+    assert reader.file_path.exists()
+
+    assert reader.num_rows == 5
+    assert reader.num_row_groups == 1
+    assert len(reader.schema_arrow) == 4
+    assert reader.file_size > 0
+
+    metadata = reader.metadata
+    assert metadata is not None
+    assert metadata.num_rows == 5
+    assert metadata.num_row_groups == 1
+
+
+def test_reader_multi_row_groups(multi_row_group_parquet: Path) -> None:
+    """Test reader with multiple row groups."""
+    reader = ParquetReader(multi_row_group_parquet)
+
+    assert reader.num_rows == 10000
+    assert reader.num_row_groups == 5
+
+    # Check each row group
+    for i in range(reader.num_row_groups):
+        rg = reader.get_row_group_info(i)
+        assert rg.num_rows == 2000
+        assert rg.total_byte_size > 0
+
+
+def test_reader_empty_file(empty_parquet: Path) -> None:
+    """Test reader with empty Parquet file."""
+    reader = ParquetReader(empty_parquet)
+
+    # file has schema but no rows
+    assert reader.num_rows == 0
+    assert reader.num_row_groups == 1
+    assert len(reader.schema_arrow) == 2
+    assert reader.file_size > 0
+
+
+def test_reader_large_schema(large_schema_parquet: Path) -> None:
+    """Test reader with many columns."""
+    reader = ParquetReader(large_schema_parquet)
+
+    assert reader.num_rows == 3
+    assert len(reader.schema_arrow) == 50
+
+    # Check all columns are named correctly
+    field_names = [field.name for field in reader.schema_arrow]
+    for i in range(50):
+        assert f"col_{i}" in field_names
diff --git a/tests/test_tui.py b/tests/test_tui.py
@@ -1,134 +1,100 @@
-"""Tests for the TUI module."""
+"""Tests for the TUI module.
+
+These are smoke tests to ensure the UI doesn't crash.
+We don't test specific text/formatting as that's brittle and changes often.
+"""
 
 from pathlib import Path
-from typing import Any, TypedDict
 
 import pytest
-from rich.console import Console
-from textual.containers import Container, VerticalScroll
 
 from datanomy.reader import ParquetReader
 from datanomy.tui import DatanomyApp
 
 
-class FileDataFixture(TypedDict):
-    """Type definition for test file data."""
-
-    file_size: str
-    num_rows: int
-    num_row_groups: int
-    schema: dict[str, str]
-
-
-@pytest.fixture
-def file(request: pytest.FixtureRequest) -> Any:
-    """Indirect fixture to get other fixtures by name."""
-    return request.getfixturevalue(request.param)
-
-
-test_data_fixtures: dict[str, FileDataFixture] = {
-    "simple.parquet": {
-        "file_size": "0.00",
-        "num_rows": 5,
-        "num_row_groups": 1,
-        "schema": {
-            "id": "int64",
-            "name": "string",
-            "age": "int64",
-            "score": "double",
-        },
-    },
-    "multi_row_group.parquet": {
-        "file_size": "0.11",
-        "num_rows": 10000,
-        "num_row_groups": 5,
-        "schema": {
-            "id": "int64",
-            "category": "string",
-            "value": "int64",
-        },
-    },
-    "complex.parquet": {
-        "file_size": "0.00",
-        "num_rows": 3,
-        "num_row_groups": 1,
-        "schema": {
-            "id": "int64",
-            "data": "struct<x: int64, y: int64>",
-            "tags": "list<element: string>",
-        },
-    },
-    "empty.parquet": {
-        "file_size": "0.00",
-        "num_rows": 0,
-        "num_row_groups": 1,
-        "schema": {
-            "id": "int64",
-            "name": "string",
-        },
-    },
-    "large_schema.parquet": {
-        "file_size": "0.01",
-        "num_rows": 3,
-        "num_row_groups": 1,
-        "schema": {f"col_{i}": "int64" for i in range(50)},
-    },
-}
-
-
-async def check_app_for_file(filename: Path) -> None:
-    reader = ParquetReader(filename)
+@pytest.mark.asyncio
+async def test_app_launches_without_crash(simple_parquet: Path) -> None:
+    """Test that app launches and runs without crashing."""
+    reader = ParquetReader(simple_parquet)
+    app = DatanomyApp(reader)
+
+    async with app.run_test():
+        # If we get here, app launched successfully
+        assert app is not None
+
+
+@pytest.mark.asyncio
+async def test_app_has_required_widgets(simple_parquet: Path) -> None:
+    """Test that all expected widgets are present."""
+    reader = ParquetReader(simple_parquet)
+    app = DatanomyApp(reader)
+
+    async with app.run_test():
+        # Verify core widgets exist
+        assert app.query_one("#file-info") is not None
+        assert app.query_one("#schema") is not None
+        assert app.query_one("#row-groups") is not None
+
+
+@pytest.mark.asyncio
+async def test_widgets_render_without_error(simple_parquet: Path) -> None:
+    """Test that all widgets can render without throwing exceptions."""
+    reader = ParquetReader(simple_parquet)
+    app = DatanomyApp(reader)
+
+    async with app.run_test():
+        # Call render on each widget - will raise if there's an error
+        file_info = app.query_one("#file-info")
+        file_info.render()
+
+        schema = app.query_one("#schema")
+        schema.render()
+
+        row_groups = app.query_one("#row-groups")
+        row_groups.render()
+
+
+@pytest.mark.asyncio
+async def test_app_with_empty_file(empty_parquet: Path) -> None:
+    """Test that app handles empty Parquet files."""
+    reader = ParquetReader(empty_parquet)
+    app = DatanomyApp(reader)
+
+    async with app.run_test():
+        # Should not crash with empty file
+        app.query_one("#file-info").render()
+        app.query_one("#schema").render()
+        app.query_one("#row-groups").render()
+
+
+@pytest.mark.asyncio
+async def test_app_with_complex_schema(complex_schema_parquet: Path) -> None:
+    """Test that app handles complex nested schemas."""
+    reader = ParquetReader(complex_schema_parquet)
     app = DatanomyApp(reader)
+
     async with app.run_test():
-        assert app.title == "DatanomyApp"
-        console = Console()
-        file_info_widget = (
-            app.query_one(VerticalScroll).query_one(Container).query_one("#file-info")
-        )
-        with console.capture() as capture:
-            console.print(file_info_widget.render())
-        file_info = capture.get()
-        file_data = test_data_fixtures[filename.name]
-        assert (
-            f"File: {filename.name}\nSize: {file_data['file_size']} MB\nRows: {file_data['num_rows']:,}\nRow Groups: {file_data['num_row_groups']}"
-            in file_info
-        )
-
-        schema_widget = (
-            app.query_one(VerticalScroll).query_one(Container).query_one("#schema")
-        )
-        with console.capture() as capture:
-            console.print(schema_widget.render())
-        schema_info = capture.get()
-        for field, dtype in file_data["schema"].items():
-            assert f"{field}: {dtype}" in schema_info
-
-        row_groups_widget = (
-            app.query_one(VerticalScroll).query_one(Container).query_one("#row-groups")
-        )
-        with console.capture() as capture:
-            console.print(row_groups_widget.render())
-        row_groups_info = capture.get()
-        for i in range(file_data["num_row_groups"]):
-            assert (
-                f"Row Group {i}: {int(file_data['num_rows']) // int(file_data['num_row_groups']):,} rows"
-                in row_groups_info
-            )
+        # Should handle nested types without crashing
+        app.query_one("#schema").render()
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "file",
-    [
-        "simple_parquet",
-        "multi_row_group_parquet",
-        "complex_schema_parquet",
-        "empty_parquet",
-        "large_schema_parquet",
-    ],
-    indirect=True,
-)
-async def test_containers_with_files(
-    file: Path,
-) -> None:
-    await check_app_for_file(file)
+async def test_app_with_many_columns(large_schema_parquet: Path) -> None:
+    """Test that app handles files with many columns."""
+    reader = ParquetReader(large_schema_parquet)
+    app = DatanomyApp(reader)
+
+    async with app.run_test():
+        # Should handle large schema without crashing
+        app.query_one("#schema").render()
+
+
+@pytest.mark.asyncio
+async def test_app_with_multiple_row_groups(multi_row_group_parquet: Path) -> None:
+    """Test that app handles multiple row groups."""
+    reader = ParquetReader(multi_row_group_parquet)
+    app = DatanomyApp(reader)
+
+    async with app.run_test():
+        # Should handle multiple row groups without crashing
+        app.query_one("#row-groups").render()