Graph-and-Geometric-Learning
diff --git a/‎app/projects/mtbench/assets/QA_sample.png‎
629 KB b/‎app/projects/mtbench/assets/QA_sample.png‎
629 KB
diff --git a/‎app/projects/mtbench/assets/diagram.png‎
183 KB b/‎app/projects/mtbench/assets/diagram.png‎
183 KB
diff --git a/‎app/projects/mtbench/assets/map.png‎
3.6 MB b/‎app/projects/mtbench/assets/map.png‎
3.6 MB
diff --git a/‎app/projects/mtbench/page.mdx‎
Lines changed: 37 additions & 7 deletions b/‎app/projects/mtbench/page.mdx‎
Lines changed: 37 additions & 7 deletions
diff --git a/‎components/sortable-table.tsx‎
Lines changed: 122 additions & 1 deletion b/‎components/sortable-table.tsx‎
Lines changed: 122 additions & 1 deletion
diff --git a/‎components/table.tsx‎
Lines changed: 38 additions & 2 deletions b/‎components/table.tsx‎
Lines changed: 38 additions & 2 deletions
diff --git a/‎config/publications.ts‎
Lines changed: 1 addition & 1 deletion b/‎config/publications.ts‎
Lines changed: 1 addition & 1 deletion
@@ -1,6 +1,6 @@
 
 import { Authors, Badges} from '@/components/utils'
-import Table from '@/components/table'
+import { Table, Table1, Table2, Table3, Table4}from '@/components/table'
 
 # MTBench: A Multimodal Time Series Benchmark for Temporal Reasoning and Question Answering
 
@@ -11,8 +11,8 @@ import Table from '@/components/table'
 <Badges
   venue=""
   github="https://github.com/Graph-and-Geometric-Learning/MTBench"
-  arxiv=""
-  pdf=""
+  arxiv="https://arxiv.org/abs/2503.16858"
+  pdf="https://arxiv.org/pdf/2503.16858"
 />
 
 
@@ -22,11 +22,41 @@ News influences the world around us—from stock markets reacting to financial r
 
 To address this, we introduce  **MTBench** (**M**ultimodal **T**ime Series **Bench**mark), a dataset designed to evaluate how well AI models understand the relationship between text and time-series data. MTBench pairs financial news with stock market movements and weather reports with historical temperature changes. Unlike existing benchmarks that focus on text or numbers separately, MTBench challenges models to analyze both together, helping to assess their ability to detect trends, interpret news, and make predictions.
 
-- **Finance**: 200K+ news articles with stock movements from 2021–2023.
-- **Weather**: Historical temperature trends covering nearly two decades with reports of extreme events.
+- **Finance**: Two datasets, each with 20K news articles paired with stock time-series data.
+- **Weather**: 2K news and time-series pairs from 50 weather stations across the U.S. (see Figure 1).
 
-We evaluate state-of-the-art large language models (LLMs) on MTBench to measure their ability to link news with data trends (see our **Leaderboard**). The results reveal key challenges—models struggle with long-term pattern recognition, cause-and-effect relationships, and seamlessly combining insights from text and numbers.
+![Figure 1. Geographical distribution of weather stations |scale=0.4](./assets/map.png)
+
+As shown in Figure 2, MTBench enables a range of complex reasoning tasks beyond simple forecasting, including semantic trend analysis, technical indicator prediction, and news-driven Q&A. These tasks challenge LLMs to integrate numerical patterns with contextual information.
+
+![Figure 2. An overview of tasks in MTBench |scale=0.4](./assets/diagram.png)
+
+The news-driven QA task includes two sub-tasks: correlation prediction and multi-choice QA. As shown in Figure 3, this task requires models to analyze both text and time-series data, understanding the news content while predicting its potential impact on future trends based on historical time-series.
+
+![Figure 3. An Example of Multi-choice QA and Correlation Prediction on Finance Dataset |scale=0.8](./assets/QA_sample.png)
+
+Various state-of-the-art large language models (LLMs) were evaluated on MTBench to measure their ability to link news with time-series trends (see **Leaderboard**). The results reveal key challenges—models struggle with long-term pattern recognition, cause-and-effect relationships, and seamlessly combining insights from text and numbers.
 
 ## Leaderboard
 
-<Table/>
+<Table/>
+
+<details>
+<summary>Leaderboard for Time-Series Forecasting</summary>
+<Table1/>
+</details>
+
+<details>
+<summary>Leaderboard Trend Prediction</summary>
+<Table2/>
+</details>
+
+<details>
+<summary>Leaderboard for Technical Indicator Calculation</summary>
+<Table3/>
+</details>
+
+<details>
+<summary>Leaderboard for News-driven Question Answering</summary>
+<Table4/>
+</details>
@@ -230,4 +230,125 @@ function SortableTable({ data }: { data: Data }) {
   );
 }
 
-export default SortableTable;
+// Generic function to create sortable tables with different headers
+function createSortableTable(headers: { key: SortKeys; label: string }[]) {
+  return function SortableTableComponent({ data }: { data: Data }) {
+    const [sortKey, setSortKey] = useState<SortKeys>(headers[0].key);
+    const [sortOrder, setSortOrder] = useState<SortOrder>("ascn");
+
+    const sortedData = useCallback(() => sortData({ tableData: data, sortKey, reverse: sortOrder === "desc" }), [data, sortKey, sortOrder]);
+
+    function changeSort(key: SortKeys) {
+      setSortOrder(sortOrder === "ascn" ? "desc" : "ascn");
+      setSortKey(key);
+    }
+
+    return (
+      <table>
+        <thead>
+          <tr>
+            {headers.map((row) => (
+              <th key={row.key}>
+                {row.label}{" "}
+                <SortButton columnKey={row.key} onClick={() => changeSort(row.key)} sortOrder={sortOrder} sortKey={sortKey} />
+              </th>
+            ))}
+          </tr>
+        </thead>
+        <tbody>
+          {sortedData().map((model) => (
+            <tr key={model.model_name}>
+              {/* <td className="headcol">{model.model_name}</td> */}
+              {headers.map((col) => (
+                <td key={col.key}>{model[col.key]}</td>
+              ))}
+            </tr>
+          ))}
+        </tbody>
+      </table>
+    );
+  };
+}
+
+// Define headers for each table
+const headers1: { key: SortKeys; label: string }[] = [
+    { key: "model_name", label: "Model" },
+    { key: "stock_price_forecast_7_day_mae_ts", label: "Stock price predict. \n for 7 days under TS (MAE)" },
+    { key: "stock_price_forecast_7_day_mae_ts_w_text", label: "Stock price predict. for 7 days under TS+Text (MAE)" },
+    { key: "stock_price_forecast_7_day_mape_ts", label: "Stock price predict. for 7 days under TS (MAPE)" },
+    { key: "stock_price_forecast_7_day_mape_ts_w_text", label: "Stock price predict. for 7 days under TS+Text (MAPE)" },
+    { key: "stock_price_forecast_30_day_mae_ts", label: "Stock price predict. for 30 days under TS (MAE)" },
+    { key: "stock_price_forecast_30_day_mae_ts_w_text", label: "Stock price predict. for 30 days under TS+Text (MAE)" },
+    { key: "stock_price_forecast_30_day_mape_ts", label: "Stock price predict. for 30 days under TS (MAPE)" },
+    { key: "stock_price_forecast_30_day_mape_ts_w_text", label: "Stock price predict. for 30 days under TS+Text (MAPE)" },
+    { key: "temp_forecast_7_day_mse_ts", label: "Temp. predict. for 7 days under TS (MSE)" },
+    { key: "temp_forecast_7_day_mse_ts_w_text", label: "Temp. predict. for 7 days under TS+Text (MSE)" },
+    { key: "temp_forecast_7_day_mae_ts", label: "Temp. predict. for 7 days under TS (MAE)" },
+    { key: "temp_forecast_7_day_mae_ts_w_text", label: "Temp. predict. for 7 days under TS+Text (MAE)" },
+    { key: "temp_forecast_14_day_mse_ts", label: "Temp. predict. for 14 days under TS (MSE)" },
+    { key: "temp_forecast_14_day_mse_ts_w_text", label: "Temp. predict. for 14 days under TS+Text (MSE)" },
+    { key: "temp_forecast_14_day_mae_ts", label: "Temp. predict. for 14 days under TS (MAE)" },
+    { key: "temp_forecast_14_day_mae_ts_w_text", label: "Temp. predict. for 14 days under TS+Text (MAE)" },
+];
+
+const headers2: { key: SortKeys; label: string }[] = [
+  { key: "model_name", label: "Model" },
+  { key: "stock_trend_predict_acc_7_day_3_way_ts", label: "Stock trend predict. for 7 days 3-way under TS (Acc)"},
+  { key: "stock_trend_predict_acc_7_day_3_way_ts_w_text", label: "Stock trend predict. for 7 days 3-way under TS+Text (Acc)"},
+  { key: "stock_trend_predict_acc_7_day_5_way_ts", label: "Stock trend predict. for 7 days 5-way under TS (Acc)"},
+  { key: "stock_trend_predict_acc_7_day_5_way_ts_w_text", label: "Stock trend predict. for 7 days 5-way under TS+Text (Acc)"},
+  { key: "stock_trend_predict_acc_30_day_3_way_ts", label: "Stock trend predict. for 30 days 3-way under TS (Acc)"},
+  { key: "stock_trend_predict_acc_30_day_3_way_ts_w_text", label: "Stock trend predict. for 30 days 3-way under TS+Text (Acc)"},
+  { key: "stock_trend_predict_acc_30_day_5_way_ts", label: "Stock trend predict. for 30 days 5-way under TS (Acc)"},
+  { key: "stock_trend_predict_acc_30_day_5_way_ts_w_text", label: "Stock trend predict. for 30 days 5-way under TS+Text (Acc)"},
+  { key: "temp_trend_predict_acc_past_ts", label: "Temp. trend predict. past under TS (Acc)"},
+  { key: "temp_trend_predict_acc_past_ts_w_text", label: "Temp. trend predict. past under TS+Text (Acc)"},
+  { key: "temp_trend_predict_acc_future_ts", label: "Temp. trend predict. future under TS (Acc)"},
+  { key: "temp_trend_predict_acc_future_ts_w_text", label: "Temp. trend predict. future under TS+Text (Acc)"},
+];
+
+const headers3: { key: SortKeys; label: string }[] = [
+  { key: "model_name", label: "Model" },
+  { key: "stock_indicator_predict_mse_7_day_macd_ts", label: "MACD predict. for 7 days under TS (MSE)"}, 
+  { key: "stock_indicator_predict_mse_7_day_macd_ts_w_text", label: "MACD predict. for 7 days under TS+Text (MSE)"},
+  { key: "stock_indicator_predict_mse_7_day_bb_ts", label: "Bollinger Bands predict. for 7 days under TS (MSE)"}, 
+  { key: "stock_indicator_predict_mse_7_day_bb_ts_w_text", label: "Bollinger Bands predict. for 7 days under TS+Text (MSE)"},
+  { key: "stock_indicator_predict_mse_30_day_macd_ts", label: "MACD predict. for 30 days under TS (MSE)"}, 
+  { key: "stock_indicator_predict_mse_30_day_macd_ts_w_text", label: "MACD predict. for 30 days under TS+Text (MSE)"},
+  { key: "stock_indicator_predict_mse_30_day_bb_ts", label: "Bollinger Bands predict. for 30 days under TS (MSE)"}, 
+  { key: "stock_indicator_predict_mse_30_day_bb_ts_w_text", label: "Bollinger Bands predict. for 30 days under TS+Text (MSE)"},
+  { key: "temp_predict_max_mse_ts", label: "Temp. predict. max under TS (MSE)"},
+  { key: "temp_predict_max_mse_ts_w_text", label: "Temp. predict. max under TS+Text (MSE)"},
+  { key: "temp_predict_max_mae_ts", label: "Temp. predict. max under TS (MAE)"},
+  { key: "temp_predict_max_mae_ts_w_text", label: "Temp. predict. max under TS+Text (MAE)"},
+  { key: "temp_predict_min_mse_ts", label: "Temp. predict. min under TS (MSE)"},
+  { key: "temp_predict_min_mse_ts_w_text", label: "Temp. predict. min under TS+Text (MSE)"},
+  { key: "temp_predict_min_mae_ts", label: "Temp. predict. min under TS (MAE)"},
+  { key: "temp_predict_min_mae_ts_w_text", label: "Temp. predict. min under TS+Text (MAE)"},
+  { key: "temp_predict_diff_mse_ts", label: "Temp. predict. diff. under TS (MSE)"},
+  { key: "temp_predict_diff_mse_ts_w_text", label: "Temp. predict. diff. under TS+Text (MSE)"},
+  { key: "temp_predict_diff_mae_ts", label: "Temp. predict. diff. under TS (MAE)"},
+  { key: "temp_predict_diff_mae_ts_w_text", label: "Temp. predict. diff. under TS+Text (MAE)"},
+];
+
+const headers4: { key: SortKeys; label: string }[] = [
+  { key: "model_name", label: "Model" },
+  { key: "news_stock_corr_acc_7_day_3_way", label: "News stock corr. for 7 days 3-way (Acc)"},
+  { key: "news_stock_corr_acc_7_day_5_way", label: "News stock corr. for 7 days 5-way (Acc)"},
+  { key: "news_stock_corr_acc_30_day_3_way", label: "News stock corr. for 30 days 3-way (Acc)"},
+  { key: "news_stock_corr_acc_30_day_5_way", label: "News stock corr. for 30 days 5-way (Acc)"},
+  { key: "news_driven_mcqa_acc_7_day_fin", label: "News driven MCQA for 7 days for Finance data (Acc)"},
+  { key: "news_driven_mcqa_acc_7_day_weather", label: "News driven MCQA for 7 days for Weather data (Acc)"},
+  { key: "news_driven_mcqa_acc_30_day_fin", label: "News driven MCQA for 30 days for Finance data (Acc)"},
+  { key: "news_driven_mcqa_acc_30_day_weather", label: "News driven MCQA for 30 days for Weather data (Acc)"}
+];
+
+// Create separate sortable tables
+const SortableTable1 = createSortableTable(headers1);
+const SortableTable2 = createSortableTable(headers2);
+const SortableTable3 = createSortableTable(headers3);
+const SortableTable4 = createSortableTable(headers4);
+
+// export default SortableTable;
+// Export all tables
+export { SortableTable ,SortableTable1, SortableTable2, SortableTable3, SortableTable4 };
@@ -1,5 +1,5 @@
 import { useState } from "react";
-import SortableTable from "./sortable-table";
+import {SortableTable, SortableTable1, SortableTable2, SortableTable3, SortableTable4} from "./sortable-table";
 import data from "../app/projects/mtbench/data/data_leaderboard.json";
 import "../styles/table.css";
 
@@ -11,4 +11,40 @@ function Table() {
   );
 }
 
-export default Table;
+function Table1() {
+  return (
+    <div className="table-wrapper">
+      <SortableTable1 data={data} />
+    </div>
+  );
+}
+
+function Table2() {
+  return (
+    <div className="table-wrapper">
+      <SortableTable2 data={data} />
+    </div>
+  );
+}
+
+function Table3() {
+  return (
+    <div className="table-wrapper">
+      <SortableTable3 data={data} />
+    </div>
+  );
+}
+
+function Table4() {
+  return (
+    <div className="table-wrapper">
+      <SortableTable4 data={data} />
+    </div>
+  );
+}
+
+// Exporting all four tables
+export { Table, Table1, Table2, Table3, Table4 };
+
+// export default Table;
+
@@ -25,7 +25,7 @@ export const publications: Publication[] = [
     venue: "",
     page: "mtbench",
     code: "https://github.com/Graph-and-Geometric-Learning/MTBencht",
-    paper: "",
+    paper: "https://arxiv.org/abs/2503.16858",
     abstract: "We introduce MTBench, a large-scale benchmark designed to evaluate large language models (LLMs) on time series and text understanding across financial and weather domains. MTBench comprises of paired time-series and textual data, including financial news with corresponding stock price movements and weather reports aligned with historical temperature records.", 
     impact: "We evaluate state-of-the-art LLMs on MTBench, analyzing their effectiveness in modeling the complex relationships between news narratives and temporal patterns. Our findings reveal significant challenges in current models, including difficulties in capturing long-term dependencies, interpreting causality in financial and weather trends, and effectively fusing multimodal information.",
     tags: [Tag.Benchmark, Tag.MultiModalFoundationModel],