robusta-dev · aantn · Oct 27, 2025 · Oct 27, 2025
diff --git a/docs/development/evaluations/history/.nav.yml b/docs/development/evaluations/history/.nav.yml
@@ -1,4 +1,5 @@
+sort:
+  direction: desc
 nav:
   - index.md
-  - Weekly: weekly/
-  - Special: special/
+  - "*"
diff --git a/...istory/special/results_20250930_153753.md → .../custom_claude_results_20250930_153753.md b/...istory/special/results_20250930_153753.md → .../custom_claude_results_20250930_153753.md
diff --git a/...istory/special/results_20251008_053744.md → ...om_self_hosted_results_20251008_053744.md b/...istory/special/results_20251008_053744.md → ...om_self_hosted_results_20251008_053744.md
diff --git a/docs/development/evaluations/history/index.md b/docs/development/evaluations/history/index.md
@@ -1,18 +1,9 @@
 # Historical Evaluation Results
 
-## Weekly Runs
+Browse through our past benchmark runs to track performance trends over time.
 
-Weekly benchmark runs with a standard set of models.
+## Weekly Results
+Regular weekly benchmark runs that track model performance over time.
 
-See the **Weekly** section in the navigation sidebar for all weekly benchmark results.
-
-## Special Benchmark Runs
-
-One-off benchmark runs for specific purposes such as:
-
-- Comparing self-hosted models
-- Testing new model versions
-- Performance analysis for specific scenarios
-- Custom model comparisons
-
-See the **Special** section in the navigation sidebar for all special benchmark runs.
+## Extended Comparisons
+Special benchmark runs comparing multiple models and configurations.
diff --git a/...history/weekly/results_20250928_001434.md → ...ations/history/results_20250928_001434.md b/...history/weekly/results_20250928_001434.md → ...ations/history/results_20250928_001434.md
diff --git a/...history/weekly/results_20250930_085923.md → ...ations/history/results_20250930_085923.md b/...history/weekly/results_20250930_085923.md → ...ations/history/results_20250930_085923.md
diff --git a/...history/weekly/results_20251012_170303.md → ...ations/history/results_20251012_170303.md b/...history/weekly/results_20251012_170303.md → ...ations/history/results_20251012_170303.md
diff --git a/docs/development/evaluations/history/special/.nav.yml b/docs/development/evaluations/history/special/.nav.yml
diff --git a/docs/development/evaluations/history/special/index.md b/docs/development/evaluations/history/special/index.md
diff --git a/docs/development/evaluations/history/weekly/.nav.yml b/docs/development/evaluations/history/weekly/.nav.yml
diff --git a/docs/development/evaluations/history/weekly/index.md b/docs/development/evaluations/history/weekly/index.md
diff --git a/run_benchmarks_local.sh b/run_benchmarks_local.sh
@@ -154,10 +154,10 @@ if [ -f "scripts/generate_eval_report.py" ]; then
         --models "$MODELS"
     echo "✅ Report generated: docs/development/evaluations/latest-results.md"
 
-    # Also generate timestamped version for history (always in weekly/)
-    mkdir -p docs/development/evaluations/history/weekly
+    # Also generate timestamped version for history
+    mkdir -p docs/development/evaluations/history
     TIMESTAMP=$(date +%Y%m%d_%H%M%S)
-    HISTORY_FILE="docs/development/evaluations/history/weekly/results_${TIMESTAMP}.md"
+    HISTORY_FILE="docs/development/evaluations/history/results_${TIMESTAMP}.md"
     poetry run python scripts/generate_eval_report.py \
         --json-file eval_results.json \
         --output-file "$HISTORY_FILE" \