thisisartium · kseebaldt · Apr 2, 2025 · Mar 27, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -53,12 +53,11 @@ jobs:
           --verbose --verbosity=10 --capture=no --tb=native --showlocals
           -k "not test_compute_alignment and not test_reproducing_the_same_text_embedding and not test_response_shows_developer_names and not test_llm_will_hallucinate_given_no_data and not test_cosine_similarity_generated_responses"
 
-      - name: Type check Python code
-        run: uv run mypy src
+      - name: Type check
+        run: uv run mypy src tests examples/team_recommender/src
 
-      - name: Run ruff linter and formatter
+      - name: Linter and formatter
         run: |
-
           uv run ruff check src tests examples
           uv run ruff format src tests examples
 

diff --git a/examples/team_recommender/src/response_matches_json_schema.py b/examples/team_recommender/src/response_matches_json_schema.py
@@ -1,11 +1,13 @@
+from typing import Any
+
 from jsonschema import FormatChecker, validate
 
 blank_checker = FormatChecker()
 
 
 def response_matches_json_schema(
     response: dict,
-    schema: any,
+    schema: Any,
     format_checker: FormatChecker = blank_checker,
 ) -> bool:
     """

diff --git a/examples/team_recommender/tests/example_1_text_response/openai_embeddings.py b/examples/team_recommender/tests/example_1_text_response/openai_embeddings.py
@@ -38,7 +38,7 @@ def stabilize_embedding_object(embedding_object):
 
 
 def stabilize_float(x: float) -> float:
-    return struct.unpack("f", struct.pack("f", x))[0]
+    return float(struct.unpack("f", struct.pack("f", x))[0])
 
 
 def create_embedding_object(text: str) -> dict:

diff --git a/examples/team_recommender/tests/example_1_text_response/test_good_fit_for_project.py b/examples/team_recommender/tests/example_1_text_response/test_good_fit_for_project.py
@@ -132,7 +132,10 @@ def test_llm_will_hallucinate_given_no_data(snapshot):
     )
 
     tolerance_margin = 0.05
-    assert similarity_to_hallucination > similarity_to_no_hallucinations + tolerance_margin
+    likely_hallucination = (
+        similarity_to_hallucination > similarity_to_no_hallucinations + tolerance_margin
+    )
+    assert likely_hallucination
 
 
 def semantic_similarity_score(a: list, b: list) -> float:

diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,8 @@ dependencies = [
   # this small library should be kept independent
   # consider adding dependencies to one of the dependency groups
 ]
-license = { file = "LICENSE" }
+license = "MIT"
+license-files = ["LICENSE"]
 
 [dependency-groups]
 test = [
@@ -36,6 +37,7 @@ dev = [
   "pydantic>=2.10.6,<3",
   "ruff>=0.9.10",
   "pytest-timeout>=2.3.1",
+  "types-jsonschema>=4.23.0.20241208",
 ]
 
 [tool.uv]
@@ -67,6 +69,10 @@ namespace_packages = true
 explicit_package_bases = true
 mypy_path = ["src"]
 
+[[tool.mypy.overrides]]
+module = "tests.*"
+disallow_untyped_defs = false
+
 [tool.black]
 line-length = 120
 target-version = ['py313']

diff --git a/tests/test_reporter.py b/tests/test_reporter.py
@@ -2,9 +2,9 @@
 import time
 from unittest.mock import MagicMock, mock_open, patch
 
+from cat_ai.helpers.helpers import root_dir
+from cat_ai.reporter import Reporter
 from cat_ai.statistical_analysis import analyse_measure_from_test_sample
-from src.cat_ai.helpers.helpers import root_dir
-from src.cat_ai.reporter import Reporter
 
 
 def test_reporter_creates_a_unique_folder_path() -> None:

diff --git a/tests/test_runner.py b/tests/test_runner.py
@@ -1,5 +1,5 @@
-from src.cat_ai.reporter import Reporter
-from src.cat_ai.runner import Runner
+from cat_ai.reporter import Reporter
+from cat_ai.runner import Runner
 
 
 # Dummy test function that will be passed to Runner

diff --git a/tests/test_statistical_analysis.py b/tests/test_statistical_analysis.py
@@ -225,9 +225,9 @@ def test_failure_rate_graph(snapshot):
     matplotlib.rcParams["ps.fonttype"] = 42
 
     # Generate a series of failure rates
-    totals = np.ones(100) * 100
-    failures = np.arange(0, 100)
-
+    totals = [100] * 100
+    failures = list(range(100))
+    assert len(failures) == len(totals)
     # Calculate results for each rate
     results = [
         analyse_failure_rate_from_test_sample(f, t) for f, t in zip(failures, totals, strict=True)

diff --git a/uv.lock b/uv.lock