Skip to content

Commit c7b0300

Browse files
Copilotbashandbone
andcommitted
Fix mock_checkpoint_manager to use Mock(spec=CheckpointManager) with AsyncMock only for async methods
Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com>
1 parent 0ea8cac commit c7b0300

21 files changed

+243
-174
lines changed

remaining_test_issues.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
<!--
2+
SPDX-FileCopyrightText: 2026 Knitli Inc.
3+
4+
SPDX-License-Identifier: MIT OR Apache-2.0
5+
-->
16
# Remaining Test Issues & Proposed Solutions
27

38
This document outlines the current state of the test suite after initial fixes, including specific issues that are blocking tests from passing and proposed solutions for each.

ruff.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,15 @@ convention = "google"
268268
"src/codeweaver/providers/vector_stores/inmemory.py" = [
269269
"C901", # too complex - vector store initialization has many conditions
270270
]
271+
# Provider config validation has complex asymmetric compatibility checking
272+
"src/codeweaver/providers/config/categories/embedding.py" = [
273+
"C901", # too complex - asymmetric validation has inherent complexity
274+
"G201", # logger.exception preferred - existing logging style
275+
"SIM102", # nested if - validates multiple independent conditions
276+
"TRY300", # else block - return-in-try pattern intentional
277+
"TRY301", # abstract raise - validation raises are intentional inline
278+
"TRY401", # redundant exception in logging - existing style
279+
]
271280
"tools/tests/**/*.py" = [
272281
"ANN201", # missing type annotation for self in method
273282
"ANN002",

scripts/language-support/export-classifications.py

100644100755
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def main() -> int:
346346
"modification": profile["modification"],
347347
"debugging": profile["debugging"],
348348
"documentation": profile["documentation"],
349-
}
349+
},
350350
}
351351

352352
with (output_dir / "_scoring.json").open("w", encoding="utf-8") as f:
@@ -442,7 +442,7 @@ def main() -> int:
442442
print(f" High confidence: {total_confident:,} ({meta['confident_pct']}%)")
443443
print(f" Unclassified: {len(unclassified_things):,}")
444444
print(
445-
f" Universal exact: {len(universal_exact):,} thing names classified same in all languages"
445+
f" Universal exact: {len(universal_exact):,} thing names classified same in all languages",
446446
)
447447
print(f" Universal 75%+: {len(universal_majority):,} thing names with majority agreement")
448448
print("\n Tier distribution:")

scripts/language-support/generate-overrides.py

100644100755
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,11 @@ def max_value_len(entries: list[dict], *, is_doc: bool = False) -> int:
189189
lines.append(f"# {language.capitalize()} language classification overrides")
190190
lines.append(
191191
f"# Holdout evaluation: {baseline_pct}% overall"
192-
f" \u2192 expected {expected_label} with overrides"
192+
f" \u2192 expected {expected_label} with overrides",
193193
)
194194
lines.append(
195195
f"# {len(misclassifications)} items to override"
196-
f" ({n_unclassified} unclassified + {n_misclassified} misclassified)"
196+
f" ({n_unclassified} unclassified + {n_misclassified} misclassified)",
197197
)
198198

199199
# [overrides] section
@@ -219,7 +219,7 @@ def max_value_len(entries: list[dict], *, is_doc: bool = False) -> int:
219219

220220

221221
def _emit_entries(
222-
lines: list[str], entries: list[dict], key_width: int, val_width: int, *, is_doc: bool
222+
lines: list[str], entries: list[dict], key_width: int, val_width: int, *, is_doc: bool,
223223
) -> None:
224224
"""Emit sorted, tier-grouped TOML entries with aligned columns."""
225225
current_tier: int | None = None
@@ -264,15 +264,15 @@ def _emit_doc_entries(lines: list[str], entries: list[dict], key_width: int) ->
264264

265265
def main() -> None: # sourcery skip: low-code-quality
266266
parser = argparse.ArgumentParser(
267-
description="Generate TOML override files from holdout evaluation data."
267+
description="Generate TOML override files from holdout evaluation data.",
268268
)
269269
parser.add_argument(
270270
"--all",
271271
action="store_true",
272272
help="Regenerate ALL languages, even those with existing overrides.",
273273
)
274274
parser.add_argument(
275-
"--lang", nargs="+", metavar="LANG", help="Generate overrides for specific languages only."
275+
"--lang", nargs="+", metavar="LANG", help="Generate overrides for specific languages only.",
276276
)
277277
parser.add_argument(
278278
"--dry-run",

scripts/language-support/holdout-evaluation.py

100644100755
Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def load_overrides(overrides_dir: Path, lang: str) -> dict[str, str]:
7979

8080

8181
def build_universal_rules(
82-
classifications_dir: Path, holdout_lang: str
82+
classifications_dir: Path, holdout_lang: str,
8383
) -> tuple[dict[str, str], dict[str, str], dict[str, str]]:
8484
"""Build universal rules excluding the holdout language.
8585
@@ -183,7 +183,7 @@ def classify_thing_universal(
183183

184184

185185
def evaluate_holdout(
186-
classifications_dir: Path, holdout_lang: str, *, use_overrides: bool = False
186+
classifications_dir: Path, holdout_lang: str, *, use_overrides: bool = False,
187187
) -> dict[str, Any]: # sourcery skip: low-code-quality
188188
"""Run holdout evaluation for a single language."""
189189

@@ -197,7 +197,7 @@ def evaluate_holdout(
197197

198198
# Build universal rules excluding this language
199199
exact_rules, majority_rules, category_rules = build_universal_rules(
200-
classifications_dir, holdout_lang
200+
classifications_dir, holdout_lang,
201201
)
202202

203203
# Load overrides if requested
@@ -226,7 +226,7 @@ def evaluate_holdout(
226226
continue
227227

228228
predicted, method = classify_thing_universal(
229-
entry, exact_rules, majority_rules, category_rules, overrides
229+
entry, exact_rules, majority_rules, category_rules, overrides,
230230
)
231231
method_counts[method] += 1
232232

@@ -303,20 +303,20 @@ def evaluate_holdout(
303303

304304

305305
def print_summary_table(
306-
label: str, results: list[dict[str, Any]]
306+
label: str, results: list[dict[str, Any]],
307307
) -> tuple[float, float, float, float]:
308308
"""Print a summary table and return averages."""
309309
w = max(12, max((len(r["language"]) for r in results), default=12) + 2)
310310
print(
311-
f"\n {'Language':<{w}s} {'Coverage':>8s} {'Accuracy':>8s} {'Overall':>8s} {'Tier-Wtd':>8s} {'Uncls':>5s} {'Wrong':>5s}"
311+
f"\n {'Language':<{w}s} {'Coverage':>8s} {'Accuracy':>8s} {'Overall':>8s} {'Tier-Wtd':>8s} {'Uncls':>5s} {'Wrong':>5s}",
312312
)
313313
print(f" {'─' * w} {'─' * 8} {'─' * 8} {'─' * 8} {'─' * 8} {'─' * 5} {'─' * 5}")
314314
for r in results:
315315
ovr = f" (+{r['override_count']})" if r.get("override_count") else ""
316316
print(
317317
f" {r['language']:<{w}s} {r['coverage_pct']:>7.1f}% {r['accuracy_pct']:>7.1f}% "
318318
f"{r['overall_accuracy_pct']:>7.1f}% {r['tier_weighted_accuracy_pct']:>7.1f}% "
319-
f"{r['unclassified']:>5d} {r['incorrect']:>5d}{ovr}"
319+
f"{r['unclassified']:>5d} {r['incorrect']:>5d}{ovr}",
320320
)
321321

322322
avg_cov = sum(r["coverage_pct"] for r in results) / len(results)
@@ -326,7 +326,7 @@ def print_summary_table(
326326
print(f" {'─' * w} {'─' * 8} {'─' * 8} {'─' * 8} {'─' * 8} {'─' * 5} {'─' * 5}")
327327
print(
328328
f" {'AVERAGE':<{w}s} {avg_cov:>7.1f}% {avg_acc:>7.1f}% "
329-
f"{avg_ovr:>7.1f}% {avg_tier:>7.1f}%"
329+
f"{avg_ovr:>7.1f}% {avg_tier:>7.1f}%",
330330
)
331331
return avg_cov, avg_acc, avg_ovr, avg_tier
332332

@@ -343,7 +343,7 @@ def main() -> int: # sourcery skip: low-code-quality
343343

344344
parser = argparse.ArgumentParser(description="Holdout evaluation for language classifications")
345345
parser.add_argument(
346-
"--all", action="store_true", help="Evaluate ALL languages (not just holdout set)"
346+
"--all", action="store_true", help="Evaluate ALL languages (not just holdout set)",
347347
)
348348
parser.add_argument("--lang", nargs="+", help="Evaluate specific language(s)")
349349
args = parser.parse_args()
@@ -406,7 +406,7 @@ def main() -> int: # sourcery skip: low-code-quality
406406
print(
407407
f" {lang:<14s} overall={result['overall_accuracy_pct']:5.1f}% "
408408
f"({result['correct']}/{result['total_things']} correct, "
409-
f"{result['unclassified']} uncls, {result['incorrect']} wrong)"
409+
f"{result['unclassified']} uncls, {result['incorrect']} wrong)",
410410
)
411411

412412
_display_phase_intro("\n", "PHASE 1 SUMMARY")
@@ -442,7 +442,7 @@ def main() -> int: # sourcery skip: low-code-quality
442442
f" {lang:<14s} overall={result['overall_accuracy_pct']:5.1f}% "
443443
f"({result['correct']}/{result['total_things']} correct, "
444444
f"{result['unclassified']} uncls, {result['incorrect']} wrong)"
445-
f"{marker}"
445+
f"{marker}",
446446
)
447447

448448
_display_phase_intro("\n", "PHASE 2 SUMMARY")
@@ -456,7 +456,7 @@ def main() -> int: # sourcery skip: low-code-quality
456456
_display_phase_intro("\n\n", "COMPARISON: Baseline vs With Overrides")
457457
w = max(12, max((len(r["language"]) for r in baseline_results), default=12) + 2)
458458
print(
459-
f"\n {'Language':<{w}s} {'Baseline':>8s} {'Override':>8s} {'Delta':>7s} {'Override Lines':>14s}"
459+
f"\n {'Language':<{w}s} {'Baseline':>8s} {'Override':>8s} {'Delta':>7s} {'Override Lines':>14s}",
460460
)
461461
print(f" {'─' * w} {'─' * 8} {'─' * 8} {'─' * 7} {'─' * 14}")
462462

@@ -469,7 +469,7 @@ def main() -> int: # sourcery skip: low-code-quality
469469
print(
470470
f" {b['language']:<{w}s} {b['overall_accuracy_pct']:>7.1f}% "
471471
f"{o['overall_accuracy_pct']:>7.1f}% {sign}{delta:>5.1f}% "
472-
f"{ovr_count:>14d}"
472+
f"{ovr_count:>14d}",
473473
)
474474

475475
delta_overall = avg_o[2] - avg_b[2]
@@ -493,7 +493,7 @@ def main() -> int: # sourcery skip: low-code-quality
493493
total_overrides = sum(r["override_count"] for r in override_results)
494494
langs_with_overrides = sum(r["override_count"] > 0 for r in override_results)
495495
print(
496-
f" Override cost: {total_overrides} lines across {langs_with_overrides} files"
496+
f" Override cost: {total_overrides} lines across {langs_with_overrides} files",
497497
)
498498

499499
if final_avg[2] >= 95:

scripts/model_data/hf-models.json.license

100644100755
File mode changed.

scripts/model_data/secondary_providers.json

100644100755
File mode changed.

scripts/model_data/secondary_providers.json.license

100644100755
File mode changed.

scripts/performance_baseline.py

100644100755
File mode changed.

src/codeweaver/providers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1766,8 +1766,8 @@
17661766
"SnowflakeEmbeddingCapabilities",
17671767
"SnowflakeProvider",
17681768
"SparseCapabilities",
1769-
"SparseEmbedding",
17701769
"SparseCapabilityResolverDep",
1770+
"SparseEmbedding",
17711771
"SparseEmbeddingCapabilityResolver",
17721772
"SparseEmbeddingConfigT",
17731773
"SparseEmbeddingModelCapabilities",

0 commit comments

Comments
 (0)