feat: add tests

megasanjay · megasanjay · commit faaa9e441f2e · 2026-02-04T15:03:14.000-08:00
diff --git a/.github/workflows/deploy-schemas.yml b/.github/workflows/deploy-schemas.yml
diff --git a/Makefile b/Makefile
@@ -57,6 +57,7 @@ PYTEST_OPTIONS += --cov=$(PACKAGE)
 endif
 ifdef CI
 PYTEST_OPTIONS += --cov-report=xml
+PYTEST_OPTIONS += -m "not gpu"
 endif
 PYTEST_RERUN_OPTIONS := --last-failed --exitfirst
 
diff --git a/README.md b/README.md
@@ -176,10 +176,10 @@ pip install poetry
 poetry install
 
 # Run tests
-poetry run pytest
+poe test
 
 # Format code
-poetry run poe format
+poe format
 ```
 
 If you are on windows and have multiple python versions, you can use the following commands:
diff --git a/poster2json/cli.py b/poster2json/cli.py
@@ -12,9 +12,10 @@
 from art import tprint
 
 
-@click.group()
-@click.version_option()
-def main():
+@click.group(invoke_without_command=True)
+@click.version_option(prog_name="poster2json")
+@click.pass_context
+def main(ctx):
     """
     poster2json - Convert scientific posters to structured JSON metadata.
     
@@ -36,7 +37,9 @@ def main():
         # Process multiple posters in a directory
         poster2json batch ./posters/ -o ./output/
     """
-    pass
+    if ctx.invoked_subcommand is None:
+        click.echo(ctx.get_help())
+        return
 
 
 @main.command()
diff --git a/poster2json/utils.py b/poster2json/utils.py
@@ -8,21 +8,19 @@
 
 
 def validate_file_path(
-    file_path: str, 
-    preexisting_file: bool = False, 
-    writable: bool = False
+    file_path: str, preexisting_file: bool = False, writable: bool = False
 ) -> bool:
     """
     Validate a file path.
-    
+
     Args:
         file_path: Path to validate
         preexisting_file: If True, check that file exists
         writable: If True, check that directory is writable
-        
+
     Returns:
         True if valid
-        
+
     Raises:
         ValueError: If path is empty or invalid
         FileNotFoundError: If preexisting_file=True and file doesn't exist
@@ -50,10 +48,10 @@ def validate_file_path(
 def is_supported_format(file_path: str) -> bool:
     """
     Check if file format is supported for poster extraction.
-    
+
     Args:
         file_path: Path to poster file
-        
+
     Returns:
         True if PDF, JPG, JPEG, or PNG
     """
@@ -64,10 +62,10 @@ def is_supported_format(file_path: str) -> bool:
 def get_poster_format(file_path: str) -> Optional[str]:
     """
     Get the format type of a poster file.
-    
+
     Args:
         file_path: Path to poster file
-        
+
     Returns:
         "pdf", "image", or None if unsupported
     """
@@ -82,16 +80,16 @@ def get_poster_format(file_path: str) -> Optional[str]:
 def normalize_text(text: str) -> str:
     """
     Normalize text for comparison.
-    
+
     Handles:
     - Unicode normalization (NFKD)
     - Whitespace consolidation
     - Quote unification
     - Dash normalization
-    
+
     Args:
         text: Input text
-        
+
     Returns:
         Normalized text
     """
@@ -102,9 +100,21 @@ def normalize_text(text: str) -> str:
 
     # Whitespace normalization
     space_chars = [
-        "\xa0", "\u2000", "\u2001", "\u2002", "\u2003", "\u2004",
-        "\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200a",
-        "\u202f", "\u205f", "\u3000",
+        "\xa0",
+        "\u2000",
+        "\u2001",
+        "\u2002",
+        "\u2003",
+        "\u2004",
+        "\u2005",
+        "\u2006",
+        "\u2007",
+        "\u2008",
+        "\u2009",
+        "\u200a",
+        "\u202f",
+        "\u205f",
+        "\u3000",
     ]
     for space in space_chars:
         text = text.replace(space, " ")
@@ -114,7 +124,7 @@ def normalize_text(text: str) -> str:
     for quote in single_quotes:
         text = text.replace(quote, "'")
 
-    double_quotes = ['"', '"', "„", "‟", "«", "»", "〝", "〞", "〟", "＂"]
+    double_quotes = ['"', "\u201c", "\u201d", "„", "‟", "«", "»", "〝", "〞", "〟", "＂"]
     for quote in double_quotes:
         text = text.replace(quote, '"')
 
@@ -132,23 +142,30 @@ def normalize_text(text: str) -> str:
 def extract_numbers(text: str) -> set:
     """
     Extract all numeric values from text.
-    
+
     Args:
         text: Input text
-        
+
     Returns:
-        Set of numeric strings found
+        Set of numeric strings found (includes both decimals and their integer parts)
     """
-    return set(re.findall(r"\d+\.?\d*", text))
+    matches = re.findall(r"\d+\.?\d*", text)
+    result = set(matches)
+    for m in matches:
+        if "." in m:
+            int_part = m.split(".")[0]
+            if int_part:
+                result.add(int_part)
+    return result
 
 
 def strip_to_alphanumeric(text: str) -> str:
     """
     Strip text to alphanumeric characters only.
-    
+
     Args:
         text: Input text
-        
+
     Returns:
         Lowercase text with only alphanumeric chars and spaces
     """
diff --git a/pyproject.toml b/pyproject.toml
@@ -134,7 +134,9 @@ cache_dir = ".cache/mypy/"
 [tool.pytest.ini_options]
 addopts = "-r sxX --show-capture=log --cov=poster2json --cov-report=term-missing:skip-covered --no-cov-on-fail"
 cache_dir = ".cache/pytest/"
-markers = []
+markers = [
+    "gpu: mark test as requiring a GPU (skip in CI with -m 'not gpu')",
+]
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1 +1,6 @@
-"""Integration tests configuration file."""
+"""Integration tests configuration file.
+
+Tests that require a GPU (e.g. extraction with LLMs) should be marked with
+@pytest.mark.gpu so they are skipped in CI (make test runs with -m 'not gpu').
+Run them locally with: pytest -m gpu
+"""
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,10 +1,29 @@
-"""Example CLI tests."""
+"""CLI tests."""
 
+import json
 import pytest
 from click.testing import CliRunner
 
 from poster2json.cli import main
 
+# Minimal valid poster JSON for validate command
+VALID_POSTER_JSON = {
+    "identifiers": [{"identifier": "10.5072/test.1", "identifierType": "DOI"}],
+    "creators": [{"name": "Doe, John"}],
+    "titles": [{"title": "Test Poster"}],
+    "publisher": {"name": "Test Publisher"},
+    "publicationYear": 2025,
+    "subjects": [{"subject": "Testing"}],
+    "dates": [{"date": "2025", "dateType": "Created"}],
+    "language": "en",
+    "types": {"resourceType": "Poster"},
+    "formats": ["PDF"],
+    "rightsList": [{"rights": "CC-BY-4.0"}],
+    "descriptions": [{"descriptionType": "Abstract", "description": "Test."}],
+    "fundingReferences": [{"funderName": "Test Funder"}],
+    "conference": {},
+}
+
 
 @pytest.fixture
 def runner():
@@ -15,3 +34,39 @@ def test_cli_exits_zero(runner):
     result = runner.invoke(main)
     assert result.exit_code == 0
     assert "poster2json" in result.output
+
+
+def test_cli_version(runner):
+    result = runner.invoke(main, ["--version"])
+    assert result.exit_code == 0
+    assert "poster2json" in result.output
+    assert "0.1" in result.output or "version" in result.output.lower()
+
+
+def test_cli_validate_valid_file(runner, tmp_path):
+    json_file = tmp_path / "poster.json"
+    json_file.write_text(json.dumps(VALID_POSTER_JSON, indent=2), encoding="utf-8")
+    result = runner.invoke(main, ["validate", str(json_file)])
+    assert result.exit_code == 0
+    assert "Valid" in result.output or "valid" in result.output.lower()
+
+
+def test_cli_validate_invalid_json(runner, tmp_path):
+    json_file = tmp_path / "bad.json"
+    json_file.write_text("not valid json", encoding="utf-8")
+    result = runner.invoke(main, ["validate", str(json_file)])
+    assert result.exit_code != 0
+
+
+def test_cli_validate_verbose(runner, tmp_path):
+    json_file = tmp_path / "poster.json"
+    json_file.write_text(json.dumps(VALID_POSTER_JSON, indent=2), encoding="utf-8")
+    result = runner.invoke(main, ["validate", str(json_file), "--verbose"])
+    assert result.exit_code == 0
+
+
+def test_cli_info(runner):
+    result = runner.invoke(main, ["info"])
+    assert result.exit_code == 0
+    assert "poster2json" in result.output
+    assert "Documentation" in result.output or "documentation" in result.output.lower()
diff --git a/tests/test_generate.py b/tests/test_generate.py
@@ -1,9 +1,16 @@
-"""Unit tests for poster2json.generate module."""
+"""Unit tests for poster2json.generate module.
 
-from poster2json.generate import generate_example_json
+Skipped: poster2json.generate does not exist yet. Remove skip when module is added.
+"""
+
+import pytest
+
+pytest.importorskip("poster2json.generate", reason="poster2json.generate module not implemented")
 
 
 def test_generate_example_json_valid(tmp_path):
+    from poster2json.generate import generate_example_json
+
     data = {"title": "Test", "version": "1.0"}
     out = tmp_path / "out.json"
     generate_example_json(data, str(out))
@@ -14,6 +21,8 @@ def test_generate_example_json_valid(tmp_path):
 
 def test_generate_example_json_empty_data_raises(tmp_path):
     import pytest
+    from poster2json.generate import generate_example_json
+
     out = tmp_path / "out.json"
     with pytest.raises(ValueError, match="Invalid input"):
         generate_example_json({}, str(out))
diff --git a/tests/test_utils.py b/tests/test_utils.py
diff --git a/tests/test_validate.py b/tests/test_validate.py