diff --git a/.gitignore b/.gitignore
index d98fba6..44d8028 100644
--- a/.gitignore
+++ b/.gitignore
@@ -72,3 +72,4 @@ TEST/
 # predictions
 historical_predictions/
 forecasts/
+dynamic_eval_data/
diff --git a/app/app/evaluation/page.tsx b/app/app/evaluation/page.tsx
index ad180ec..a5d8c88 100644
--- a/app/app/evaluation/page.tsx
+++ b/app/app/evaluation/page.tsx
@@ -6,6 +6,8 @@ import { TrendingUp, AlertCircle, Calendar, Activity } from "lucide-react";
 import {
   LineChart,
   Line,
+  BarChart,
+  Bar,
   XAxis,
   YAxis,
   CartesianGrid,
@@ -56,6 +58,19 @@ interface ChartDataPoint {
   MAE: number;
 }
 
+interface MonthlyMetrics {
+  overall_rmse: number;
+  overall_mae: number;
+  samples: number;
+  by_horizon: Record<string, HorizonMetrics>;
+}
+
+interface MonthlyAnalysis {
+  evaluation_period: { start: string; end: string };
+  monthly_metrics: Record<string, MonthlyMetrics>;
+  computed_at: string;
+}
+
 // ============================================================================
 // Components
 // ============================================================================
@@ -202,10 +217,13 @@ const WarningMessage = ({ message }: { message: string }) => (
 export default function EvaluationPage() {
   const [staticEval, setStaticEval] = useState<StaticEvaluation | null>(null);
   const [dynamicEval, setDynamicEval] = useState<DynamicEvaluation | null>(null);
+  const [monthlyAnalysis, setMonthlyAnalysis] = useState<MonthlyAnalysis | null>(null);
   const [loadingStatic, setLoadingStatic] = useState(true);
   const [loadingDynamic, setLoadingDynamic] = useState(true);
+  const [loadingMonthly, setLoadingMonthly] = useState(true);
   const [errorStatic, setErrorStatic] = useState<string | null>(null);
   const [errorDynamic, setErrorDynamic] = useState<string | null>(null);
+  const [errorMonthly, setErrorMonthly] = useState<string | null>(null);
   const hasFetched = useRef(false);
 
   useEffect(() => {
@@ -240,8 +258,23 @@ export default function EvaluationPage() {
       }
     };
 
+    const fetchMonthlyAnalysis = async () => {
+      try {
+        const response = await fetch(`${API_BASE_URL}/evaluation/static/monthly`);
+        if (!response.ok) throw new Error("Failed to fetch monthly analysis");
+        const data = await response.json();
+        setMonthlyAnalysis(data);
+        setErrorMonthly(null);
+      } catch (error) {
+        setErrorMonthly(error instanceof Error ? error.message : "Unknown error");
+      } finally {
+        setLoadingMonthly(false);
+      }
+    };
+
     fetchStaticEvaluation();
     fetchDynamicEvaluation();
+    fetchMonthlyAnalysis();
   }, []);
 
   const formatDate = (dateStr: string) => {
@@ -337,11 +370,199 @@ export default function EvaluationPage() {
             data={prepareChartData(staticEval.metrics.by_horizon)}
             title="Error vs Forecast Horizon"
           />
+
+          {/* Monthly Error Analysis */}
+          {renderMonthlyAnalysis()}
         </motion.div>
       </AnimatePresence>
     );
   };
 
+  const renderMonthlyAnalysis = () => {
+    if (loadingMonthly) {
+      return (
+        <div className="bg-slate-800/50 backdrop-blur-sm rounded-xl border border-slate-700/50 p-6 mt-6">
+          <div className="h-5 w-48 bg-slate-700/50 rounded animate-pulse mb-6" />
+          <div className="space-y-3">
+            {[1, 2, 3].map((i) => (
+              <div key={i} className="h-12 bg-slate-700/30 rounded animate-pulse" />
+            ))}
+          </div>
+        </div>
+      );
+    }
+
+    if (errorMonthly) {
+      return (
+        <div className="mt-6">
+          <ErrorMessage message={errorMonthly} />
+        </div>
+      );
+    }
+
+    if (!monthlyAnalysis) return null;
+
+    // Sort months by overall RMSE to identify best and worst
+    const sortedMonths = Object.entries(monthlyAnalysis.monthly_metrics)
+      .map(([month, metrics]) => ({
+        month,
+        rmse: metrics.overall_rmse,
+        mae: metrics.overall_mae,
+        samples: metrics.samples,
+      }))
+      .sort((a, b) => b.rmse - a.rmse); // Highest error first
+
+    const worstMonth = sortedMonths[0];
+    const bestMonth = sortedMonths[sortedMonths.length - 1];
+
+    // Format month for display
+    const formatMonth = (monthStr: string) => {
+      // monthStr is in format "YYYY-MM"
+      // Parse directly to avoid timezone issues
+      const [year, month] = monthStr.split("-");
+      const date = new Date(parseInt(year), parseInt(month) - 1, 1);
+      return date.toLocaleDateString("en-US", { year: "numeric", month: "long" });
+    };
+
+    // Prepare chart data (sorted chronologically by month key)
+    const chartData = Object.entries(monthlyAnalysis.monthly_metrics)
+      .sort(([a], [b]) => a.localeCompare(b)) // Sort by YYYY-MM string
+      .map(([month, metrics]) => ({
+        month: formatMonth(month).split(" ")[0], // Short month name
+        fullMonth: formatMonth(month),
+        RMSE: parseFloat(metrics.overall_rmse.toFixed(2)),
+        MAE: parseFloat(metrics.overall_mae.toFixed(2)),
+      }));
+
+    // Calculate improvement percentage
+    const improvementPercent = (
+      ((worstMonth.rmse - bestMonth.rmse) / worstMonth.rmse) *
+      100
+    ).toFixed(1);
+
+    // Determine season text based on actual months
+    const getSeasonText = () => {
+      const months = Object.keys(monthlyAnalysis.monthly_metrics).sort();
+      const firstMonth = parseInt(months[0].split("-")[1]); // Get month number
+
+      if (firstMonth <= 2) return "Winter → Summer";
+      if (firstMonth <= 5) return "Spring → Summer";
+      return "Seasonal Variation";
+    };
+
+    return (
+      <div className="bg-slate-800/50 backdrop-blur-sm rounded-xl border border-slate-700/50 p-6 mt-6 hover:border-amber-500/50 transition-all">
+        <h3 className="text-lg font-semibold text-white mb-6 flex items-center gap-2">
+          <Calendar className="w-5 h-5 text-amber-400" />
+          Monthly Error Analysis
+        </h3>
+
+        {/* Best and Worst Months Highlight */}
+        <div className="grid grid-cols-1 md:grid-cols-3 gap-4 mb-8">
+          <div className="bg-red-900/20 border border-red-700/50 rounded-lg p-4">
+            <div className="text-xs text-red-400 font-semibold uppercase tracking-wide mb-2">
+              Highest Error
+            </div>
+            <div className="text-2xl font-bold text-white mb-1">
+              {formatMonth(worstMonth.month)}
+            </div>
+            <div className="text-sm text-slate-300">
+              {worstMonth.rmse.toFixed(2)}°C RMSE
+            </div>
+          </div>
+
+          <div className="bg-green-900/20 border border-green-700/50 rounded-lg p-4">
+            <div className="text-xs text-green-400 font-semibold uppercase tracking-wide mb-2">
+              Lowest Error
+            </div>
+            <div className="text-2xl font-bold text-white mb-1">
+              {formatMonth(bestMonth.month)}
+            </div>
+            <div className="text-sm text-slate-300">
+              {bestMonth.rmse.toFixed(2)}°C RMSE
+            </div>
+          </div>
+
+          <div className="bg-amber-900/20 border border-amber-700/50 rounded-lg p-4">
+            <div className="text-xs text-amber-400 font-semibold uppercase tracking-wide mb-2">
+              Trend
+            </div>
+            <div className="text-2xl font-bold text-white mb-1">
+              ↓ {improvementPercent}%
+            </div>
+            <div className="text-sm text-slate-300">{getSeasonText()}</div>
+          </div>
+        </div>
+
+        {/* Simplified Line Chart */}
+        <ResponsiveContainer width="100%" height={280}>
+          <LineChart
+            data={chartData}
+            margin={{ top: 5, right: 30, left: 20, bottom: 5 }}
+          >
+            <CartesianGrid strokeDasharray="3 3" stroke="#334155" opacity={0.5} />
+            <XAxis
+              dataKey="month"
+              stroke="#94a3b8"
+              tick={{ fill: "#94a3b8", fontSize: 13 }}
+              tickLine={{ stroke: "#94a3b8" }}
+            />
+            <YAxis
+              stroke="#94a3b8"
+              label={{
+                value: "Temperature Error (°C)",
+                angle: -90,
+                position: "insideLeft",
+                fill: "#94a3b8",
+                style: { fontSize: 13 },
+              }}
+              tick={{ fill: "#94a3b8", fontSize: 12 }}
+              domain={[0, (dataMax: number) => Math.ceil(dataMax * 1.15)]}
+            />
+            <Tooltip
+              contentStyle={{
+                backgroundColor: "#1e293b",
+                border: "1px solid #475569",
+                borderRadius: "8px",
+                color: "#fff",
+                padding: "12px",
+              }}
+              formatter={(value: number) => [`${value.toFixed(2)}°C`, ""]}
+              labelFormatter={(label) => {
+                const data = chartData.find((d) => d.month === label);
+                return data?.fullMonth || label;
+              }}
+            />
+            <Legend wrapperStyle={{ paddingTop: "20px" }} iconType="line" />
+            <Line
+              type="monotone"
+              dataKey="RMSE"
+              stroke="#EB088A"
+              strokeWidth={3}
+              dot={{ fill: "#EB088A", r: 6 }}
+              activeDot={{ r: 8 }}
+              name="RMSE"
+            />
+            <Line
+              type="monotone"
+              dataKey="MAE"
+              stroke="#313CFF"
+              strokeWidth={3}
+              dot={{ fill: "#313CFF", r: 6 }}
+              activeDot={{ r: 8 }}
+              name="MAE"
+            />
+          </LineChart>
+        </ResponsiveContainer>
+
+        <div className="mt-6 text-xs text-slate-400 text-center">
+          Error metrics decrease from late winter through summer, reflecting more stable
+          atmospheric patterns and improved predictability in warmer months
+        </div>
+      </div>
+    );
+  };
+
   const renderDynamicSection = () => {
     if (loadingDynamic) {
       return (
diff --git a/backend/app/main.py b/backend/app/main.py
index ad14434..6d919e5 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -569,6 +569,64 @@ async def get_forecast_logs(limit: int = 100) -> dict[str, Any]:
         ) from e
 
 
+@app.get("/evaluation/static/monthly")
+async def get_static_monthly_analysis() -> dict[str, Any]:
+    """Get monthly error analysis for static evaluation period.
+
+    Provides breakdown of RMSE/MAE by month to identify seasonal patterns
+    and months with highest/lowest errors.
+
+    Returns
+    -------
+    dict[str, Any]
+        Monthly metrics with per-horizon breakdowns
+    """
+    logger = get_logger()
+    logger.info("=" * 80)
+    logger.info("GET /evaluation/static/monthly endpoint called")
+
+    if not hasattr(app.state, "eval_storage"):
+        raise HTTPException(
+            status_code=503,
+            detail="Evaluation storage not available. BigQuery may not be configured.",
+        )
+
+    try:
+        # Use same static evaluation period as main endpoint
+        start_date = datetime(2024, 2, 6, 12, 0, 0)
+        end_date = datetime(2024, 7, 19, 17, 0, 0)
+
+        logger.info(f"Computing monthly metrics for: {start_date} to {end_date}")
+
+        # Compute monthly metrics using SQL aggregation in BigQuery
+        monthly_data = await asyncio.to_thread(
+            app.state.eval_storage.compute_monthly_metrics, start_date, end_date
+        )
+
+        logger.info(f"Monthly data computed for {len(monthly_data['by_month'])} months")
+
+        response = {
+            "evaluation_period": {
+                "start": start_date.isoformat(),
+                "end": end_date.isoformat(),
+            },
+            "monthly_metrics": monthly_data["by_month"],
+            "computed_at": datetime.now().isoformat(),
+        }
+
+        logger.info("Monthly analysis endpoint completed successfully")
+        logger.info("=" * 80)
+        return response
+
+    except Exception as e:
+        logger.error(f"Failed to compute monthly analysis: {str(e)}")
+        logger.info("=" * 80)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to compute monthly analysis: {str(e)}",
+        ) from e
+
+
 @app.get("/evaluation/dynamic")
 async def get_dynamic_evaluation() -> dict[str, Any]:
     """Get dynamic evaluation metrics (rolling 1-month window).
diff --git a/scripts/clean_dynamic_eval.py b/scripts/clean_dynamic_eval.py
new file mode 100755
index 0000000..1e6e8c3
--- /dev/null
+++ b/scripts/clean_dynamic_eval.py
@@ -0,0 +1,344 @@
+#!/usr/bin/env python
+"""Clean dynamic evaluation data before repopulating.
+
+This script deletes predictions and ground truth data from the rolling 30-day
+window to prepare for fresh data population. Use this when switching from
+biased (24h interval) to unbiased (1h interval) predictions.
+
+Usage:
+    # Interactive mode (asks for confirmation)
+    GCP_PROJECT_ID=your-project python scripts/clean_dynamic_eval.py
+
+    # Force mode (no confirmation)
+    python scripts/clean_dynamic_eval.py --force
+
+    # Custom day range
+    python scripts/clean_dynamic_eval.py --days 30 --force
+"""
+
+import argparse
+import os
+import sys
+from datetime import datetime, timedelta
+
+from google.cloud import bigquery
+from rich.console import Console
+from rich.panel import Panel
+from rich.prompt import Confirm
+from rich.table import Table
+
+
+console = Console()
+
+
+def get_data_counts(
+    client: bigquery.Client,
+    project_id: str,
+    dataset_id: str,
+    start_date: datetime,
+    end_date: datetime,
+) -> tuple[int, int]:
+    """Get counts of predictions and ground truth to be deleted.
+
+    Parameters
+    ----------
+    client : bigquery.Client
+        BigQuery client
+    project_id : str
+        GCP project ID
+    dataset_id : str
+        BigQuery dataset ID
+    start_date : datetime
+        Start of deletion range
+    end_date : datetime
+        End of deletion range
+
+    Returns
+    -------
+    tuple[int, int]
+        (prediction_count, ground_truth_count)
+    """
+    # Count predictions
+    pred_query = f"""
+    SELECT COUNT(*) as count
+    FROM `{project_id}.{dataset_id}.predictions`
+    WHERE run_timestamp >= @start_date
+      AND run_timestamp <= @end_date
+    """
+
+    job_config = bigquery.QueryJobConfig(
+        query_parameters=[
+            bigquery.ScalarQueryParameter("start_date", "TIMESTAMP", start_date),
+            bigquery.ScalarQueryParameter("end_date", "TIMESTAMP", end_date),
+        ]
+    )
+
+    pred_job = client.query(pred_query, job_config=job_config)
+    pred_results = list(pred_job.result())
+    pred_count = pred_results[0]["count"] if pred_results else 0
+
+    # Count ground truth (extended by 48h for forecast horizon)
+    gt_end = end_date + timedelta(hours=48)
+    gt_query = f"""
+    SELECT COUNT(*) as count
+    FROM `{project_id}.{dataset_id}.ground_truth`
+    WHERE timestamp >= @start_date
+      AND timestamp <= @end_date
+    """
+
+    job_config_gt = bigquery.QueryJobConfig(
+        query_parameters=[
+            bigquery.ScalarQueryParameter("start_date", "TIMESTAMP", start_date),
+            bigquery.ScalarQueryParameter("end_date", "TIMESTAMP", gt_end),
+        ]
+    )
+
+    gt_job = client.query(gt_query, job_config=job_config_gt)
+    gt_results = list(gt_job.result())
+    gt_count = gt_results[0]["count"] if gt_results else 0
+
+    return pred_count, gt_count
+
+
+def delete_predictions(
+    client: bigquery.Client,
+    project_id: str,
+    dataset_id: str,
+    start_date: datetime,
+    end_date: datetime,
+) -> int:
+    """Delete predictions in the specified date range.
+
+    Parameters
+    ----------
+    client : bigquery.Client
+        BigQuery client
+    project_id : str
+        GCP project ID
+    dataset_id : str
+        BigQuery dataset ID
+    start_date : datetime
+        Start of deletion range
+    end_date : datetime
+        End of deletion range
+
+    Returns
+    -------
+    int
+        Number of rows deleted
+    """
+    query = f"""
+    DELETE FROM `{project_id}.{dataset_id}.predictions`
+    WHERE run_timestamp >= @start_date
+      AND run_timestamp <= @end_date
+    """
+
+    job_config = bigquery.QueryJobConfig(
+        query_parameters=[
+            bigquery.ScalarQueryParameter("start_date", "TIMESTAMP", start_date),
+            bigquery.ScalarQueryParameter("end_date", "TIMESTAMP", end_date),
+        ]
+    )
+
+    console.print("[cyan]Deleting predictions...[/cyan]")
+    job = client.query(query, job_config=job_config)
+    job.result()
+
+    console.print("[green]✓[/green] Deleted predictions")
+    return job.num_dml_affected_rows or 0
+
+
+def delete_ground_truth(
+    client: bigquery.Client,
+    project_id: str,
+    dataset_id: str,
+    start_date: datetime,
+    end_date: datetime,
+) -> int:
+    """Delete ground truth in the specified date range.
+
+    Extends end_date by 48h to include ground truth for forecast horizons.
+
+    Parameters
+    ----------
+    client : bigquery.Client
+        BigQuery client
+    project_id : str
+        GCP project ID
+    dataset_id : str
+        BigQuery dataset ID
+    start_date : datetime
+        Start of deletion range
+    end_date : datetime
+        End of deletion range
+
+    Returns
+    -------
+    int
+        Number of rows deleted
+    """
+    # Extend by 48h to cover forecast ground truth
+    gt_end = end_date + timedelta(hours=48)
+
+    query = f"""
+    DELETE FROM `{project_id}.{dataset_id}.ground_truth`
+    WHERE timestamp >= @start_date
+      AND timestamp <= @end_date
+    """
+
+    job_config = bigquery.QueryJobConfig(
+        query_parameters=[
+            bigquery.ScalarQueryParameter("start_date", "TIMESTAMP", start_date),
+            bigquery.ScalarQueryParameter("end_date", "TIMESTAMP", gt_end),
+        ]
+    )
+
+    console.print("[cyan]Deleting ground truth...[/cyan]")
+    job = client.query(query, job_config=job_config)
+    job.result()
+
+    console.print("[green]✓[/green] Deleted ground truth")
+    return job.num_dml_affected_rows or 0
+
+
+def main() -> None:
+    """Clean dynamic evaluation data."""
+    parser = argparse.ArgumentParser(
+        description="Clean dynamic evaluation data before repopulating"
+    )
+    parser.add_argument(
+        "--days",
+        type=int,
+        default=30,
+        help="Number of days to clean (default: 30)",
+    )
+    parser.add_argument(
+        "--force",
+        "-f",
+        action="store_true",
+        help="Skip confirmation prompt",
+    )
+    parser.add_argument(
+        "--predictions-only",
+        action="store_true",
+        help="Delete only predictions, keep ground truth",
+    )
+
+    args = parser.parse_args()
+
+    # Get project ID
+    project_id = os.getenv("GCP_PROJECT_ID")
+    if not project_id:
+        console.print("[red]✗ GCP_PROJECT_ID environment variable not set[/red]")
+        console.print("Set it with: export GCP_PROJECT_ID=your-project-id")
+        sys.exit(1)
+
+    dataset_id = os.getenv("BIGQUERY_DATASET", "gaca_evaluation")
+
+    # Calculate date range
+    end_date = datetime.utcnow()
+    start_date = end_date - timedelta(days=args.days)
+
+    console.print()
+    console.print(
+        Panel.fit(
+            "[bold red]Clean Dynamic Evaluation Data[/bold red]\n"
+            "[dim]This will permanently delete data from BigQuery[/dim]",
+            border_style="red",
+        )
+    )
+    console.print()
+
+    # Display deletion info
+    info_table = Table(show_header=False, box=None, padding=(0, 2))
+    info_table.add_column(style="cyan")
+    info_table.add_column()
+    info_table.add_row("Project", project_id)
+    info_table.add_row("Dataset", dataset_id)
+    info_table.add_row("Start Date", start_date.strftime("%Y-%m-%d %H:%M UTC"))
+    info_table.add_row("End Date", end_date.strftime("%Y-%m-%d %H:%M UTC"))
+    info_table.add_row("Days", str(args.days))
+    console.print(info_table)
+    console.print()
+
+    try:
+        # Initialize BigQuery client
+        client = bigquery.Client(project=project_id)
+
+        # Get counts before deletion
+        console.print("[cyan]Checking existing data...[/cyan]")
+        pred_count, gt_count = get_data_counts(
+            client, project_id, dataset_id, start_date, end_date
+        )
+
+        if pred_count == 0 and gt_count == 0:
+            console.print("[yellow]⚠ No data found in the specified range[/yellow]")
+            console.print(
+                f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
+            )
+            sys.exit(0)
+
+        # Display what will be deleted
+        console.print()
+        delete_table = Table(title="Data to be Deleted", box=None)
+        delete_table.add_column("Table", style="cyan")
+        delete_table.add_column("Rows", justify="right", style="yellow")
+        delete_table.add_row("Predictions", f"{pred_count:,}")
+        if not args.predictions_only:
+            delete_table.add_row(
+                "Ground Truth", f"{gt_count:,} (includes +48h forecast data)"
+            )
+        console.print(delete_table)
+        console.print()
+
+        # Confirmation
+        if not args.force:
+            confirmed = Confirm.ask(
+                "[bold red]⚠ This will permanently delete the data above. Continue?[/bold red]"
+            )
+            if not confirmed:
+                console.print("[yellow]⊘ Cancelled by user[/yellow]")
+                sys.exit(0)
+
+        console.print()
+        console.print("=" * 70)
+        console.print("[bold]Deleting data...[/bold]")
+        console.print("=" * 70)
+        console.print()
+
+        # Delete predictions
+        deleted_pred = delete_predictions(
+            client, project_id, dataset_id, start_date, end_date
+        )
+
+        # Delete ground truth (unless predictions-only)
+        deleted_gt = 0
+        if not args.predictions_only:
+            deleted_gt = delete_ground_truth(
+                client, project_id, dataset_id, start_date, end_date
+            )
+
+        # Summary
+        console.print()
+        console.print("=" * 70)
+        console.print("[bold green]✓ Deletion Complete![/bold green]")
+        console.print(f"  Predictions deleted: {deleted_pred:,}")
+        if not args.predictions_only:
+            console.print(f"  Ground truth deleted: {deleted_gt:,}")
+        console.print("=" * 70)
+        console.print()
+
+        console.print("[green]✓ Ready to repopulate with unbiased data![/green]\n")
+        console.print("[dim]Next steps:[/dim]")
+        console.print(
+            f"  python scripts/populate_dynamic_evaluation.py --days {args.days} --interval 1 --verbose\n"
+        )
+
+    except Exception as e:
+        console.print(f"\n[red]✗ Error: {e}[/red]")
+        console.print_exception()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/populate_dynamic_evaluation.py b/scripts/populate_dynamic_evaluation.py
index e25f67e..86aff04 100644
--- a/scripts/populate_dynamic_evaluation.py
+++ b/scripts/populate_dynamic_evaluation.py
@@ -241,8 +241,8 @@ def main() -> None:
     parser.add_argument(
         "--interval",
         type=int,
-        default=24,
-        help="Interval between predictions in hours (default: 24)",
+        default=1,
+        help="Interval between predictions in hours (default: 1 for unbiased evaluation)",
     )
     parser.add_argument(
         "--output",
@@ -285,6 +285,21 @@ def main() -> None:
         "(+48h)[/dim]\n"
     )
 
+    # Warn about temporal bias
+    if args.interval > 1:
+        console.print(
+            f"[bold yellow]⚠ WARNING: Using interval={args.interval}h may introduce temporal bias![/bold yellow]"
+        )
+        console.print(
+            "[yellow]  Each forecast horizon will be evaluated at only specific times of day,[/yellow]"
+        )
+        console.print(
+            "[yellow]  causing diurnal effects to bias error metrics (e.g., 48h < 36h errors).[/yellow]"
+        )
+        console.print(
+            "[yellow]  For unbiased evaluation, use --interval 1 (hourly predictions).[/yellow]\n"
+        )
+
     # Step 1: Run batch predictions (unless skipped)
     if not args.skip_inference:
         console.print("=" * 70)
diff --git a/src/gaca_ews/evaluation/storage.py b/src/gaca_ews/evaluation/storage.py
index 69a05b5..5c851bb 100644
--- a/src/gaca_ews/evaluation/storage.py
+++ b/src/gaca_ews/evaluation/storage.py
@@ -615,6 +615,120 @@ def get_last_forecast_timestamp(self) -> str | None:
 
         return results[0]["last_run"].isoformat()
 
+    def compute_monthly_metrics(
+        self, start_date: datetime, end_date: datetime
+    ) -> dict[str, Any]:
+        """Compute error metrics grouped by month for error analysis.
+
+        Parameters
+        ----------
+        start_date : datetime
+            Start of the range (inclusive)
+        end_date : datetime
+            End of the range (inclusive)
+
+        Returns
+        -------
+        dict[str, Any]
+            Monthly metrics with overall and per-horizon breakdowns
+        """
+        query = f"""
+        WITH matched AS (
+            SELECT
+                p.forecast_time,
+                p.horizon_hours,
+                p.predicted_temp,
+                g.actual_temp
+            FROM `{self.project_id}.{self.dataset_id}.predictions` p
+            JOIN `{self.project_id}.{self.dataset_id}.ground_truth` g
+                ON p.forecast_time = g.timestamp
+                AND ROUND(p.lat, 2) = ROUND(g.lat, 2)
+                AND ROUND(p.lon, 2) = ROUND(g.lon, 2)
+            WHERE p.run_timestamp BETWEEN @start_date AND @end_date
+        ),
+        monthly AS (
+            SELECT
+                FORMAT_TIMESTAMP('%Y-%m', forecast_time) AS month,
+                horizon_hours,
+                predicted_temp,
+                actual_temp
+            FROM matched
+            WHERE forecast_time >= @start_date
+              AND forecast_time <= @end_date
+        )
+        SELECT
+            month,
+            horizon_hours,
+            SQRT(AVG(POW(predicted_temp - actual_temp, 2))) AS rmse,
+            AVG(ABS(predicted_temp - actual_temp)) AS mae,
+            COUNT(*) AS sample_count
+        FROM monthly
+        GROUP BY month, horizon_hours
+        ORDER BY month, horizon_hours
+        """
+
+        job_config = bigquery.QueryJobConfig(
+            query_parameters=[
+                bigquery.ScalarQueryParameter("start_date", "TIMESTAMP", start_date),
+                bigquery.ScalarQueryParameter("end_date", "TIMESTAMP", end_date),
+            ]
+        )
+
+        console.print(
+            f"[cyan]Computing monthly metrics for {start_date} to {end_date}...[/cyan]"
+        )
+
+        query_job = self.client.query(query, job_config=job_config)
+        results = query_job.result()
+
+        # Parse results into nested structure
+        by_month: dict[str, dict[str, Any]] = {}
+
+        for row in results:
+            month = str(row["month"])
+            horizon = int(row["horizon_hours"])
+            rmse = float(row["rmse"])
+            mae = float(row["mae"])
+            count = int(row["sample_count"])
+
+            if month not in by_month:
+                by_month[month] = {"by_horizon": {}, "samples": 0}
+
+            by_month[month]["by_horizon"][horizon] = {
+                "rmse": rmse,
+                "mae": mae,
+                "sample_count": count,
+            }
+            by_month[month]["samples"] += count
+
+        # Compute overall metrics per month (across all horizons)
+        for _month, month_data in by_month.items():
+            total_samples = 0
+            sum_squared_errors = 0.0
+            sum_abs_errors = 0.0
+
+            for horizon_data in month_data["by_horizon"].values():
+                count = horizon_data["sample_count"]
+                rmse = horizon_data["rmse"]
+                mae = horizon_data["mae"]
+
+                total_samples += count
+                sum_squared_errors += (rmse**2) * count
+                sum_abs_errors += mae * count
+
+            if total_samples > 0:
+                month_data["overall_rmse"] = (sum_squared_errors / total_samples) ** 0.5
+                month_data["overall_mae"] = sum_abs_errors / total_samples
+            else:
+                month_data["overall_rmse"] = 0.0
+                month_data["overall_mae"] = 0.0
+
+        console.print(
+            f"[green]✓[/green] Computed monthly metrics for {len(by_month)} months"
+        )
+
+        return {"by_month": by_month}
+
     def get_last_forecast_run_info(self) -> dict[str, Any] | None:
         """Get information about the last forecast run.