knitli · bashandbone · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
diff --git a/remaining_test_issues.md b/remaining_test_issues.md
@@ -1,3 +1,8 @@
+<!--
+SPDX-FileCopyrightText: 2026 Knitli Inc.
+
+SPDX-License-Identifier: MIT OR Apache-2.0
+-->
 # Remaining Test Issues & Proposed Solutions
 
 This document outlines the current state of the test suite after initial fixes, including specific issues that are blocking tests from passing and proposed solutions for each.

diff --git a/ruff.toml b/ruff.toml
@@ -268,6 +268,15 @@ convention = "google"
 "src/codeweaver/providers/vector_stores/inmemory.py" = [
   "C901",  # too complex - vector store initialization has many conditions
 ]
+# Provider config validation has complex asymmetric compatibility checking
+"src/codeweaver/providers/config/categories/embedding.py" = [
+  "C901",    # too complex - asymmetric validation has inherent complexity
+  "G201",    # logger.exception preferred - existing logging style
+  "SIM102",  # nested if - validates multiple independent conditions
+  "TRY300",  # else block - return-in-try pattern intentional
+  "TRY301",  # abstract raise - validation raises are intentional inline
+  "TRY401",  # redundant exception in logging - existing style
+]
 "tools/tests/**/*.py" = [
   "ANN201",  # missing type annotation for self in method
   "ANN002",

diff --git a/scripts/language-support/export-classifications.py b/scripts/language-support/export-classifications.py
@@ -346,7 +346,7 @@ def main() -> int:
                 "modification": profile["modification"],
                 "debugging": profile["debugging"],
                 "documentation": profile["documentation"],
-            }
+            },
         }
 
     with (output_dir / "_scoring.json").open("w", encoding="utf-8") as f:
@@ -442,7 +442,7 @@ def main() -> int:
     print(f"  High confidence:   {total_confident:,} ({meta['confident_pct']}%)")
     print(f"  Unclassified:      {len(unclassified_things):,}")
     print(
-        f"  Universal exact:   {len(universal_exact):,} thing names classified same in all languages"
+        f"  Universal exact:   {len(universal_exact):,} thing names classified same in all languages",
     )
     print(f"  Universal 75%+:    {len(universal_majority):,} thing names with majority agreement")
     print("\n  Tier distribution:")

diff --git a/scripts/language-support/generate-overrides.py b/scripts/language-support/generate-overrides.py
@@ -189,11 +189,11 @@ def max_value_len(entries: list[dict], *, is_doc: bool = False) -> int:
     lines.append(f"# {language.capitalize()} language classification overrides")
     lines.append(
         f"# Holdout evaluation: {baseline_pct}% overall"
-        f" \u2192 expected {expected_label} with overrides"
+        f" \u2192 expected {expected_label} with overrides",
     )
     lines.append(
         f"# {len(misclassifications)} items to override"
-        f" ({n_unclassified} unclassified + {n_misclassified} misclassified)"
+        f" ({n_unclassified} unclassified + {n_misclassified} misclassified)",
     )
 
     # [overrides] section
@@ -219,7 +219,7 @@ def max_value_len(entries: list[dict], *, is_doc: bool = False) -> int:
 
 
 def _emit_entries(
-    lines: list[str], entries: list[dict], key_width: int, val_width: int, *, is_doc: bool
+    lines: list[str], entries: list[dict], key_width: int, val_width: int, *, is_doc: bool,
 ) -> None:
     """Emit sorted, tier-grouped TOML entries with aligned columns."""
     current_tier: int | None = None
@@ -264,15 +264,15 @@ def _emit_doc_entries(lines: list[str], entries: list[dict], key_width: int) ->
 
 def main() -> None:  # sourcery skip: low-code-quality
     parser = argparse.ArgumentParser(
-        description="Generate TOML override files from holdout evaluation data."
+        description="Generate TOML override files from holdout evaluation data.",
     )
     parser.add_argument(
         "--all",
         action="store_true",
         help="Regenerate ALL languages, even those with existing overrides.",
     )
     parser.add_argument(
-        "--lang", nargs="+", metavar="LANG", help="Generate overrides for specific languages only."
+        "--lang", nargs="+", metavar="LANG", help="Generate overrides for specific languages only.",
     )
     parser.add_argument(
         "--dry-run",

diff --git a/scripts/language-support/holdout-evaluation.py b/scripts/language-support/holdout-evaluation.py
@@ -79,7 +79,7 @@ def load_overrides(overrides_dir: Path, lang: str) -> dict[str, str]:
 
 
 def build_universal_rules(
-    classifications_dir: Path, holdout_lang: str
+    classifications_dir: Path, holdout_lang: str,
 ) -> tuple[dict[str, str], dict[str, str], dict[str, str]]:
     """Build universal rules excluding the holdout language.
 
@@ -183,7 +183,7 @@ def classify_thing_universal(
 
 
 def evaluate_holdout(
-    classifications_dir: Path, holdout_lang: str, *, use_overrides: bool = False
+    classifications_dir: Path, holdout_lang: str, *, use_overrides: bool = False,
 ) -> dict[str, Any]:  # sourcery skip: low-code-quality
     """Run holdout evaluation for a single language."""
 
@@ -197,7 +197,7 @@ def evaluate_holdout(
 
     # Build universal rules excluding this language
     exact_rules, majority_rules, category_rules = build_universal_rules(
-        classifications_dir, holdout_lang
+        classifications_dir, holdout_lang,
     )
 
     # Load overrides if requested
@@ -226,7 +226,7 @@ def evaluate_holdout(
                 continue
 
             predicted, method = classify_thing_universal(
-                entry, exact_rules, majority_rules, category_rules, overrides
+                entry, exact_rules, majority_rules, category_rules, overrides,
             )
             method_counts[method] += 1
 
@@ -303,20 +303,20 @@ def evaluate_holdout(
 
 
 def print_summary_table(
-    label: str, results: list[dict[str, Any]]
+    label: str, results: list[dict[str, Any]],
 ) -> tuple[float, float, float, float]:
     """Print a summary table and return averages."""
     w = max(12, max((len(r["language"]) for r in results), default=12) + 2)
     print(
-        f"\n  {'Language':<{w}s}  {'Coverage':>8s}  {'Accuracy':>8s}  {'Overall':>8s}  {'Tier-Wtd':>8s}  {'Uncls':>5s}  {'Wrong':>5s}"
+        f"\n  {'Language':<{w}s}  {'Coverage':>8s}  {'Accuracy':>8s}  {'Overall':>8s}  {'Tier-Wtd':>8s}  {'Uncls':>5s}  {'Wrong':>5s}",
     )
     print(f"  {'─' * w}  {'─' * 8}  {'─' * 8}  {'─' * 8}  {'─' * 8}  {'─' * 5}  {'─' * 5}")
     for r in results:
         ovr = f" (+{r['override_count']})" if r.get("override_count") else ""
         print(
             f"  {r['language']:<{w}s}  {r['coverage_pct']:>7.1f}%  {r['accuracy_pct']:>7.1f}%  "
             f"{r['overall_accuracy_pct']:>7.1f}%  {r['tier_weighted_accuracy_pct']:>7.1f}%  "
-            f"{r['unclassified']:>5d}  {r['incorrect']:>5d}{ovr}"
+            f"{r['unclassified']:>5d}  {r['incorrect']:>5d}{ovr}",
         )
 
     avg_cov = sum(r["coverage_pct"] for r in results) / len(results)
@@ -326,7 +326,7 @@ def print_summary_table(
     print(f"  {'─' * w}  {'─' * 8}  {'─' * 8}  {'─' * 8}  {'─' * 8}  {'─' * 5}  {'─' * 5}")
     print(
         f"  {'AVERAGE':<{w}s}  {avg_cov:>7.1f}%  {avg_acc:>7.1f}%  "
-        f"{avg_ovr:>7.1f}%  {avg_tier:>7.1f}%"
+        f"{avg_ovr:>7.1f}%  {avg_tier:>7.1f}%",
     )
     return avg_cov, avg_acc, avg_ovr, avg_tier
 
@@ -343,7 +343,7 @@ def main() -> int:  # sourcery skip: low-code-quality
 
     parser = argparse.ArgumentParser(description="Holdout evaluation for language classifications")
     parser.add_argument(
-        "--all", action="store_true", help="Evaluate ALL languages (not just holdout set)"
+        "--all", action="store_true", help="Evaluate ALL languages (not just holdout set)",
     )
     parser.add_argument("--lang", nargs="+", help="Evaluate specific language(s)")
     args = parser.parse_args()
@@ -406,7 +406,7 @@ def main() -> int:  # sourcery skip: low-code-quality
         print(
             f"  {lang:<14s}  overall={result['overall_accuracy_pct']:5.1f}%  "
             f"({result['correct']}/{result['total_things']} correct, "
-            f"{result['unclassified']} uncls, {result['incorrect']} wrong)"
+            f"{result['unclassified']} uncls, {result['incorrect']} wrong)",
         )
 
     _display_phase_intro("\n", "PHASE 1 SUMMARY")
@@ -442,7 +442,7 @@ def main() -> int:  # sourcery skip: low-code-quality
                 f"  {lang:<14s}  overall={result['overall_accuracy_pct']:5.1f}%  "
                 f"({result['correct']}/{result['total_things']} correct, "
                 f"{result['unclassified']} uncls, {result['incorrect']} wrong)"
-                f"{marker}"
+                f"{marker}",
             )
 
         _display_phase_intro("\n", "PHASE 2 SUMMARY")
@@ -456,7 +456,7 @@ def main() -> int:  # sourcery skip: low-code-quality
         _display_phase_intro("\n\n", "COMPARISON: Baseline vs With Overrides")
         w = max(12, max((len(r["language"]) for r in baseline_results), default=12) + 2)
         print(
-            f"\n  {'Language':<{w}s}  {'Baseline':>8s}  {'Override':>8s}  {'Delta':>7s}  {'Override Lines':>14s}"
+            f"\n  {'Language':<{w}s}  {'Baseline':>8s}  {'Override':>8s}  {'Delta':>7s}  {'Override Lines':>14s}",
         )
         print(f"  {'─' * w}  {'─' * 8}  {'─' * 8}  {'─' * 7}  {'─' * 14}")
 
@@ -469,7 +469,7 @@ def main() -> int:  # sourcery skip: low-code-quality
             print(
                 f"  {b['language']:<{w}s}  {b['overall_accuracy_pct']:>7.1f}%  "
                 f"{o['overall_accuracy_pct']:>7.1f}%  {sign}{delta:>5.1f}%  "
-                f"{ovr_count:>14d}"
+                f"{ovr_count:>14d}",
             )
 
         delta_overall = avg_o[2] - avg_b[2]
@@ -493,7 +493,7 @@ def main() -> int:  # sourcery skip: low-code-quality
         total_overrides = sum(r["override_count"] for r in override_results)
         langs_with_overrides = sum(r["override_count"] > 0 for r in override_results)
         print(
-            f"  Override cost:               {total_overrides} lines across {langs_with_overrides} files"
+            f"  Override cost:               {total_overrides} lines across {langs_with_overrides} files",
         )
 
     if final_avg[2] >= 95:

diff --git a/scripts/model_data/hf-models.json.license b/scripts/model_data/hf-models.json.license
diff --git a/scripts/model_data/secondary_providers.json b/scripts/model_data/secondary_providers.json
diff --git a/scripts/model_data/secondary_providers.json.license b/scripts/model_data/secondary_providers.json.license
diff --git a/scripts/performance_baseline.py b/scripts/performance_baseline.py
diff --git a/src/codeweaver/providers/__init__.py b/src/codeweaver/providers/__init__.py
@@ -1766,8 +1766,8 @@
     "SnowflakeEmbeddingCapabilities",
     "SnowflakeProvider",
     "SparseCapabilities",
-    "SparseEmbedding",
     "SparseCapabilityResolverDep",
+    "SparseEmbedding",
     "SparseEmbeddingCapabilityResolver",
     "SparseEmbeddingConfigT",
     "SparseEmbeddingModelCapabilities",