diff --git a/docs/development/evaluations/history/.nav.yml b/docs/development/evaluations/history/.nav.yml
index 8c83753ad..d71105aeb 100644
--- a/docs/development/evaluations/history/.nav.yml
+++ b/docs/development/evaluations/history/.nav.yml
@@ -1,4 +1,5 @@
+sort:
+  direction: desc
 nav:
   - index.md
-  - Weekly: weekly/
-  - Special: special/
+  - "*"
diff --git a/docs/development/evaluations/history/special/results_20250930_153753.md b/docs/development/evaluations/history/custom_claude_results_20250930_153753.md
similarity index 100%
rename from docs/development/evaluations/history/special/results_20250930_153753.md
rename to docs/development/evaluations/history/custom_claude_results_20250930_153753.md
diff --git a/docs/development/evaluations/history/special/results_20251008_053744.md b/docs/development/evaluations/history/custom_self_hosted_results_20251008_053744.md
similarity index 100%
rename from docs/development/evaluations/history/special/results_20251008_053744.md
rename to docs/development/evaluations/history/custom_self_hosted_results_20251008_053744.md
diff --git a/docs/development/evaluations/history/index.md b/docs/development/evaluations/history/index.md
index 22fb6f63c..52ad48e0f 100644
--- a/docs/development/evaluations/history/index.md
+++ b/docs/development/evaluations/history/index.md
@@ -1,18 +1,9 @@
 # Historical Evaluation Results
 
-## Weekly Runs
+Browse through our past benchmark runs to track performance trends over time.
 
-Weekly benchmark runs with a standard set of models.
+## Weekly Results
+Regular weekly benchmark runs that track model performance over time.
 
-See the **Weekly** section in the navigation sidebar for all weekly benchmark results.
-
-## Special Benchmark Runs
-
-One-off benchmark runs for specific purposes such as:
-
-- Comparing self-hosted models
-- Testing new model versions
-- Performance analysis for specific scenarios
-- Custom model comparisons
-
-See the **Special** section in the navigation sidebar for all special benchmark runs.
+## Extended Comparisons
+Special benchmark runs comparing multiple models and configurations.
diff --git a/docs/development/evaluations/history/weekly/results_20250928_001434.md b/docs/development/evaluations/history/results_20250928_001434.md
similarity index 100%
rename from docs/development/evaluations/history/weekly/results_20250928_001434.md
rename to docs/development/evaluations/history/results_20250928_001434.md
diff --git a/docs/development/evaluations/history/weekly/results_20250930_085923.md b/docs/development/evaluations/history/results_20250930_085923.md
similarity index 100%
rename from docs/development/evaluations/history/weekly/results_20250930_085923.md
rename to docs/development/evaluations/history/results_20250930_085923.md
diff --git a/docs/development/evaluations/history/weekly/results_20251012_170303.md b/docs/development/evaluations/history/results_20251012_170303.md
similarity index 100%
rename from docs/development/evaluations/history/weekly/results_20251012_170303.md
rename to docs/development/evaluations/history/results_20251012_170303.md
diff --git a/docs/development/evaluations/history/special/.nav.yml b/docs/development/evaluations/history/special/.nav.yml
deleted file mode 100644
index d71105aeb..000000000
--- a/docs/development/evaluations/history/special/.nav.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-sort:
-  direction: desc
-nav:
-  - index.md
-  - "*"
diff --git a/docs/development/evaluations/history/special/index.md b/docs/development/evaluations/history/special/index.md
deleted file mode 100644
index c55826838..000000000
--- a/docs/development/evaluations/history/special/index.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Special Benchmark Runs
-
-One-off benchmark runs for specific purposes such as comparing self-hosted models, testing new model versions, or custom performance analysis.
-
-## Available Results
-
-All special benchmark results are listed in the navigation sidebar.
diff --git a/docs/development/evaluations/history/weekly/.nav.yml b/docs/development/evaluations/history/weekly/.nav.yml
deleted file mode 100644
index d71105aeb..000000000
--- a/docs/development/evaluations/history/weekly/.nav.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-sort:
-  direction: desc
-nav:
-  - index.md
-  - "*"
diff --git a/docs/development/evaluations/history/weekly/index.md b/docs/development/evaluations/history/weekly/index.md
deleted file mode 100644
index 7b87e1b61..000000000
--- a/docs/development/evaluations/history/weekly/index.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Weekly Benchmark Runs
-
-Weekly benchmark runs with a standard set of models.
-
-## Available Results
-
-All weekly benchmark results are listed in the navigation sidebar.
diff --git a/run_benchmarks_local.sh b/run_benchmarks_local.sh
index 19e86c0a9..09ae7a167 100755
--- a/run_benchmarks_local.sh
+++ b/run_benchmarks_local.sh
@@ -154,10 +154,10 @@ if [ -f "scripts/generate_eval_report.py" ]; then
         --models "$MODELS"
     echo "✅ Report generated: docs/development/evaluations/latest-results.md"
 
-    # Also generate timestamped version for history (always in weekly/)
-    mkdir -p docs/development/evaluations/history/weekly
+    # Also generate timestamped version for history
+    mkdir -p docs/development/evaluations/history
     TIMESTAMP=$(date +%Y%m%d_%H%M%S)
-    HISTORY_FILE="docs/development/evaluations/history/weekly/results_${TIMESTAMP}.md"
+    HISTORY_FILE="docs/development/evaluations/history/results_${TIMESTAMP}.md"
     poetry run python scripts/generate_eval_report.py \
         --json-file eval_results.json \
         --output-file "$HISTORY_FILE" \