Skip to content

Commit d02dbfc

Browse files
committed
feat: add strict assertion types for test framework
1 parent e72475f commit d02dbfc

File tree

6 files changed

+148
-0
lines changed

6 files changed

+148
-0
lines changed

tests/cases/diff/regression/regression_003_large_config_fragmentation.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,5 @@ assertions:
157157
options:
158158
max_fragments: 8
159159
max_files: 3
160+
max_fragments_per_file: 3
160161
min_score: 90
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Regression: context enrichment should be bounded
2+
# A small change should not pull in massive amounts of context.
3+
4+
initial:
5+
src/utils/helpers.py: |
6+
def format_date(date_str):
7+
parts = date_str.split("-")
8+
return f"{parts[2]}/{parts[1]}/{parts[0]}"
9+
10+
def format_currency(amount, currency="USD"):
11+
if currency == "USD":
12+
return f"${amount:.2f}"
13+
return f"{amount:.2f} {currency}"
14+
15+
changed:
16+
src/utils/helpers.py: |
17+
def format_date(date_str):
18+
parts = date_str.split("-")
19+
return f"{parts[2]}.{parts[1]}.{parts[0]}"
20+
21+
def format_currency(amount, currency="USD"):
22+
if currency == "USD":
23+
return f"${amount:.2f}"
24+
return f"{amount:.2f} {currency}"
25+
26+
assertions:
27+
must_include_files:
28+
- helpers.py
29+
30+
must_include_content:
31+
- format_date
32+
33+
must_not_include:
34+
- GARBAGE_REG_008_A
35+
36+
options:
37+
max_enrichment: 20.0
38+
min_recall: 1.0
39+
max_noise_rate: 0.05
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Regression: single-file change should not produce excessive per-file fragments
2+
# When only one function changes in a file, the file should appear as 1-2 fragments.
3+
4+
initial:
5+
src/auth/login.py: |
6+
from hashlib import sha256
7+
8+
def hash_password(password):
9+
return sha256(password.encode()).hexdigest()
10+
11+
def verify_password(password, password_hash):
12+
return hash_password(password) == password_hash
13+
14+
def create_session(user_id):
15+
import uuid
16+
return str(uuid.uuid4())
17+
18+
def validate_session(session_id):
19+
return len(session_id) == 36
20+
21+
changed:
22+
src/auth/login.py: |
23+
from hashlib import sha256
24+
import secrets
25+
26+
def hash_password(password, salt=None):
27+
if salt is None:
28+
salt = secrets.token_hex(16)
29+
return f"{salt}:{sha256((salt + password).encode()).hexdigest()}"
30+
31+
def verify_password(password, password_hash):
32+
salt, hashed = password_hash.split(":")
33+
return hash_password(password, salt) == password_hash
34+
35+
def create_session(user_id):
36+
import uuid
37+
return str(uuid.uuid4())
38+
39+
def validate_session(session_id):
40+
return len(session_id) == 36
41+
42+
assertions:
43+
must_include_files:
44+
- login.py
45+
46+
must_include_content:
47+
- hash_password
48+
- verify_password
49+
50+
must_not_include:
51+
- GARBAGE_REG_009_A
52+
53+
options:
54+
max_fragments_per_file: 5
55+
min_recall: 1.0

tests/framework/loader.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ def _parse_yaml_test(data: dict, source_file: Path | None = None) -> YamlTestCas
7474
must_not_include_files=must_not_include_files,
7575
max_fragments=options.get("max_fragments", data.get("max_fragments")),
7676
max_files=options.get("max_files", data.get("max_files")),
77+
max_fragments_per_file=options.get("max_fragments_per_file", data.get("max_fragments_per_file")),
78+
max_enrichment=options.get("max_enrichment", data.get("max_enrichment")),
79+
min_recall=options.get("min_recall", data.get("min_recall")),
80+
max_noise_rate=options.get("max_noise_rate", data.get("max_noise_rate")),
81+
max_context_tokens=options.get("max_context_tokens", data.get("max_context_tokens")),
7782
commit_message=options.get("commit_message", data.get("commit_message", "Update files")),
7883
min_budget=options.get("min_budget", data.get("min_budget")),
7984
add_garbage_files=options.get("add_garbage", data.get("add_garbage_files", True)),

tests/framework/types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ class YamlTestCase:
2525
must_not_include_files: list[str] = field(default_factory=list)
2626
max_fragments: int | None = None
2727
max_files: int | None = None
28+
max_fragments_per_file: int | None = None
29+
max_enrichment: float | None = None
30+
min_recall: float | None = None
31+
max_noise_rate: float | None = None
32+
max_context_tokens: int | None = None
2833
commit_message: str = "Update files"
2934
min_budget: int | None = None
3035
add_garbage_files: bool = True

tests/test_yaml_diff.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import Counter
12
from pathlib import Path
23

34
import pytest
@@ -24,13 +25,26 @@ def test_cases_loaded():
2425
MIN_INDIVIDUAL_SCORE = 10.0
2526

2627

28+
def _count_fragments_per_file(context: dict) -> dict[str, int]:
29+
counts: Counter[str] = Counter()
30+
for frag in context.get("fragments", []):
31+
path = frag.get("path", "")
32+
if path:
33+
counts[path] += 1
34+
return dict(counts)
35+
36+
2737
@pytest.mark.parametrize("case", ALL_CASES, ids=lambda c: c.id)
2838
def test_diff_yaml(yaml_test_runner: YamlTestRunner, case: YamlTestCase, record_property, request):
2939
if case.xfail:
3040
request.node.add_marker(pytest.mark.xfail(reason=case.xfail, strict=True))
3141
context = yaml_test_runner.run_test_case(case)
3242
breakdown = yaml_test_runner.score_test_case(context, case)
3343

44+
fragments = context.get("fragments", [])
45+
frag_count = len(fragments)
46+
unique_files = len({f.get("path", "") for f in fragments if f.get("path")})
47+
3448
record_property("score", breakdown.score)
3549
record_property("recall", round(breakdown.recall * 100, 1))
3650
record_property("noise_rate", round(breakdown.noise_rate * 100, 1))
@@ -39,12 +53,41 @@ def test_diff_yaml(yaml_test_runner: YamlTestRunner, case: YamlTestCase, record_
3953
record_property("enrichment", round(breakdown.enrichment * 100))
4054
record_property("diff_tokens", breakdown.diff_tokens)
4155
record_property("context_tokens", breakdown.context_tokens)
56+
record_property("fragment_count", frag_count)
57+
record_property("unique_files", unique_files)
4258

4359
effective_min = case.min_score if case.min_score is not None else MIN_INDIVIDUAL_SCORE
4460
assert breakdown.score >= effective_min, f"[{case.id}] score {breakdown.score:.1f}% below minimum {effective_min}%"
4561
if case.must_include_files:
4662
assert breakdown.diff_covered, f"[{case.id}] diff lines not covered by context"
4763

64+
if case.max_fragments_per_file is not None:
65+
per_file = _count_fragments_per_file(context)
66+
for path, count in per_file.items():
67+
assert (
68+
count <= case.max_fragments_per_file
69+
), f"[{case.id}] {path} has {count} fragments, exceeds max_fragments_per_file={case.max_fragments_per_file}"
70+
71+
if case.max_enrichment is not None:
72+
assert (
73+
breakdown.enrichment <= case.max_enrichment
74+
), f"[{case.id}] enrichment {breakdown.enrichment:.1f}x exceeds max_enrichment={case.max_enrichment}"
75+
76+
if case.min_recall is not None:
77+
assert (
78+
breakdown.recall >= case.min_recall
79+
), f"[{case.id}] recall {breakdown.recall:.1%} below min_recall={case.min_recall:.0%}"
80+
81+
if case.max_noise_rate is not None:
82+
assert (
83+
breakdown.noise_rate <= case.max_noise_rate
84+
), f"[{case.id}] noise_rate {breakdown.noise_rate:.1%} exceeds max_noise_rate={case.max_noise_rate:.0%}"
85+
86+
if case.max_context_tokens is not None:
87+
assert (
88+
breakdown.context_tokens <= case.max_context_tokens
89+
), f"[{case.id}] {breakdown.context_tokens} context tokens exceeds max_context_tokens={case.max_context_tokens}"
90+
4891

4992
@pytest.mark.parametrize(
5093
"case",

0 commit comments

Comments
 (0)