Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,11 @@ jobs:
--verbose --verbosity=10 --capture=no --tb=native --showlocals
-k "not test_compute_alignment and not test_reproducing_the_same_text_embedding and not test_response_shows_developer_names and not test_llm_will_hallucinate_given_no_data and not test_cosine_similarity_generated_responses"

- name: Type check Python code
run: uv run mypy src
- name: Type check
run: uv run mypy src tests examples/team_recommender/src

- name: Run ruff linter and formatter
- name: Linter and formatter
run: |

uv run ruff check src tests examples
uv run ruff format src tests examples

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from typing import Any

from jsonschema import FormatChecker, validate

blank_checker = FormatChecker()


def response_matches_json_schema(
response: dict,
schema: any,
schema: Any,
format_checker: FormatChecker = blank_checker,
) -> bool:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def stabilize_embedding_object(embedding_object):


def stabilize_float(x: float) -> float:
return struct.unpack("f", struct.pack("f", x))[0]
return float(struct.unpack("f", struct.pack("f", x))[0])


def create_embedding_object(text: str) -> dict:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ def test_llm_will_hallucinate_given_no_data(snapshot):
)

tolerance_margin = 0.05
assert similarity_to_hallucination > similarity_to_no_hallucinations + tolerance_margin
likely_hallucination = (
similarity_to_hallucination > similarity_to_no_hallucinations + tolerance_margin
)
assert likely_hallucination


def semantic_similarity_score(a: list, b: list) -> float:
Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ dependencies = [
# this small library should be kept independent
# consider adding dependencies to one of the dependency groups
]
license = { file = "LICENSE" }
license = "MIT"
license-files = ["LICENSE"]

[dependency-groups]
test = [
Expand All @@ -36,6 +37,7 @@ dev = [
"pydantic>=2.10.6,<3",
"ruff>=0.9.10",
"pytest-timeout>=2.3.1",
"types-jsonschema>=4.23.0.20241208",
]

[tool.uv]
Expand Down Expand Up @@ -67,6 +69,10 @@ namespace_packages = true
explicit_package_bases = true
mypy_path = ["src"]

[[tool.mypy.overrides]]
module = "tests.*"
disallow_untyped_defs = false

[tool.black]
line-length = 120
target-version = ['py313']
Expand Down
4 changes: 2 additions & 2 deletions tests/test_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import time
from unittest.mock import MagicMock, mock_open, patch

from cat_ai.helpers.helpers import root_dir
from cat_ai.reporter import Reporter
from cat_ai.statistical_analysis import analyse_measure_from_test_sample
from src.cat_ai.helpers.helpers import root_dir
from src.cat_ai.reporter import Reporter


def test_reporter_creates_a_unique_folder_path() -> None:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from src.cat_ai.reporter import Reporter
from src.cat_ai.runner import Runner
from cat_ai.reporter import Reporter
from cat_ai.runner import Runner


# Dummy test function that will be passed to Runner
Expand Down
6 changes: 3 additions & 3 deletions tests/test_statistical_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ def test_failure_rate_graph(snapshot):
matplotlib.rcParams["ps.fonttype"] = 42

# Generate a series of failure rates
totals = np.ones(100) * 100
failures = np.arange(0, 100)

totals = [100] * 100
failures = list(range(100))
assert len(failures) == len(totals)
# Calculate results for each rate
results = [
analyse_failure_rate_from_test_sample(f, t) for f, t in zip(failures, totals, strict=True)
Expand Down
14 changes: 14 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.