Moirai expts

kage08 · kage08 · commit 6a549aea6746 · 2025-04-05T14:53:46.000-04:00
diff --git a/comp.py b/comp.py
@@ -1,15 +1,10 @@
 import gc
 import os
-import sys
 
 import numpy as np
 import pandas as pd
 import torch
 
-src_path = os.path.abspath(os.path.join("src"))
-if src_path not in sys.path:
-    sys.path.insert(0, src_path)
-
 from samay.dataset import MoiraiDataset
 from samay.model import MoiraiTSModel
 from samay.utils import load_args
@@ -27,7 +22,7 @@ def update_leaderboard(dataset_name, model_name, metrics, leaderboard_path):
     """
     if not os.path.exists(leaderboard_path):
         # Create the leaderboard with appropriate columns if it doesn't exist
-        columns = ["Dataset"] + [
+        columns: list[str] = ["Dataset"] + [
             f"{model}_{metric}"
             for model in ["TimesFM", "Chronos", "Moirai"]
             for metric in metrics.keys()
@@ -131,6 +126,6 @@ def update_leaderboard(dataset_name, model_name, metrics, leaderboard_path):
         eval_results, _, _, _ = moirai.evaluate(val_dataset, metrics=["MSE", "MASE"])
         metrics = {"MSE": eval_results["MSE"], "MASE": eval_results["MASE"]}
         update_leaderboard(dataset, model_name, metrics, leaderboard_path)
-    del chronos
+    del moirai
     torch.cuda.empty_cache()
     gc.collect()
diff --git a/download_data.py b/download_data.py
@@ -1,46 +1,46 @@
-from datasets import load_dataset
 import os
 
+from datasets import Dataset, load_dataset
 
 if __name__ == "__main__":
     save_dir = "data/monash"
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     dataset_names = [
-    "weather",
-    "tourism_yearly",
-    "tourism_quarterly",
-    "tourism_monthly",
-    "cif_2016",
-    "london_smart_meters",
-    "australian_electricity_demand",
-    "wind_farms_minutely",
-    "bitcoin",
-    "pedestrian_counts",
-    "vehicle_trips",
-    "kdd_cup_2018",
-    "nn5_daily",
-    "nn5_weekly",
-    "kaggle_web_traffic",
-    "kaggle_web_traffic_weekly",
-    "solar_10_minutes",
-    "solar_weekly",
-    "car_parts",
-    "fred_md",
-    "traffic_hourly",
-    "traffic_weekly",
-    "hospital",
-    "covid_deaths",
-    "sunspot",
-    "saugeenday",
-    "us_births",
-    "solar_4_seconds",
-    "wind_4_seconds",
-    "rideshare",
-    "oikolab_weather",
-    "temperature_rain"
-]
+        "weather",
+        "tourism_yearly",
+        "tourism_quarterly",
+        "tourism_monthly",
+        "cif_2016",
+        "london_smart_meters",
+        "australian_electricity_demand",
+        "wind_farms_minutely",
+        "bitcoin",
+        "pedestrian_counts",
+        "vehicle_trips",
+        "kdd_cup_2018",
+        "nn5_daily",
+        "nn5_weekly",
+        "kaggle_web_traffic",
+        "kaggle_web_traffic_weekly",
+        "solar_10_minutes",
+        "solar_weekly",
+        "car_parts",
+        "fred_md",
+        "traffic_hourly",
+        "traffic_weekly",
+        "hospital",
+        "covid_deaths",
+        "sunspot",
+        "saugeenday",
+        "us_births",
+        "solar_4_seconds",
+        "wind_4_seconds",
+        "rideshare",
+        "oikolab_weather",
+        "temperature_rain",
+    ]
     for dataset_name in dataset_names:
-        dataset = load_dataset("monash_tsf", dataset_name)
+        dataset: Dataset = load_dataset("monash_tsf", dataset_name)  # type: ignore
         dataset.save_to_disk(f"{save_dir}/{dataset_name}")
-        print(f"Downloaded {dataset_name} dataset")
+        print(f"Downloaded {dataset_name} dataset")
diff --git a/src/samay/models/moment/momentfm/utils/masking.py b/src/samay/models/moment/momentfm/utils/masking.py
@@ -42,7 +42,7 @@ def convert_patch_to_seq_view(
         """
         return mask.repeat_interleave(patch_len, dim=-1)
 
-    def generate_mask(self, x: torch.Tensor, input_mask: Optional[torch.Tensor] = None):
+    def generate_mask(self, x: torch.Tensor, input_mask: torch.Tensor):
         """
         Input:
             x : torch.Tensor of shape
@@ -57,8 +57,12 @@ def generate_mask(self, x: torch.Tensor, input_mask: Optional[torch.Tensor] = No
             return self._mask_patch_view(x, input_mask=input_mask)
         elif x.ndim == 3:
             return self._mask_seq_view(x, input_mask=input_mask)
+        else:
+            raise ValueError(
+                f"Invalid input shape: {x.shape}. Expected 3D or 4D tensor."
+            )
 
-    def _mask_patch_view(self, x, input_mask=None):
+    def _mask_patch_view(self, x, input_mask: torch.Tensor):
         """
         Input:
             x : torch.Tensor of shape
@@ -101,7 +105,7 @@ def _mask_patch_view(self, x, input_mask=None):
 
         return mask.long()
 
-    def _mask_seq_view(self, x, input_mask=None):
+    def _mask_seq_view(self, x, input_mask: torch.Tensor):
         """
         Input:
             x : torch.Tensor of shape
diff --git a/transform_ILI.py b/transform_ILI.py
@@ -1,39 +1,39 @@
 import pandas as pd
-from datetime import datetime
-
 
 if __name__ == "__main__":
     df = pd.read_csv("data/Flu_USA/ILINet.csv")
 
-    df = df[df['REGION TYPE'] == 'National']
-    df = df[df['WEEK'] != 53]
+    df = df[df["REGION TYPE"] == "National"]
+    df = df[df["WEEK"] != 53]
 
-    df['date'] = pd.to_datetime(df['YEAR'].astype(str) + '-W' + df['WEEK'].astype(str) + '-1', format='%Y-W%U-%w')
+    df["date"] = pd.to_datetime(
+        df["YEAR"].astype(str) + "-W" + df["WEEK"].astype(str) + "-1",
+        format="%Y-W%U-%w",
+    )
 
     result = []
     for i in range(len(df) - 1):
-        result.append(df.iloc[i])  
+        result.append(df.iloc[i])
 
-        if (df.iloc[i + 1]['date'] - df.iloc[i]['date']).days == 14:
+        if (df.iloc[i + 1]["date"] - df.iloc[i]["date"]).days == 14:
             new_row = df.iloc[i].copy()
-            new_row['date'] = df.iloc[i]['date'] + pd.Timedelta(days=7)
+            new_row["date"] = df.iloc[i]["date"] + pd.Timedelta(days=7)
             result.append(new_row)
-    
+
     result.append(df.iloc[-1])
     df = pd.DataFrame(result)
 
-    df = df.drop(columns=['YEAR', 'WEEK'])
-    gaps = df['date'].diff().dropna().unique()
+    df = df.drop(columns=["YEAR", "WEEK"])
+    gaps = df["date"].diff().dropna().unique()
     print("Unique time intervals:", gaps)
-    df['time_diff'] = df['date'].diff()
+    df["time_diff"] = df["date"].diff()
 
-    rows_with_14_days = df[df['time_diff'] == pd.Timedelta(days=14)]
+    rows_with_14_days = df[df["time_diff"] == pd.Timedelta(days=14)]
     print(rows_with_14_days)
-    df = df.drop(columns=['time_diff'])
-    infered_freq = pd.infer_freq(df['date'])
+    df = df.drop(columns=["time_diff"])
+    infered_freq = pd.infer_freq(df["date"])
     print(f"Infered frequency: {infered_freq}")
 
     df.to_csv("data/Flu_USA/Flu_USA.csv", index=False)
 
     print("Data saved to output.csv")
-