datamllab
diff --git a/‎ltsm/common/base_training_pipeline.py‎
Lines changed: 2 additions & 1 deletion b/‎ltsm/common/base_training_pipeline.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎ltsm/data_provider/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎ltsm/data_provider/__init__.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎ltsm/data_provider/prompt_generator.py‎
Lines changed: 404 additions & 0 deletions b/‎ltsm/data_provider/prompt_generator.py‎
Lines changed: 404 additions & 0 deletions
diff --git a/‎ltsm/models/__init__.py‎
Lines changed: 6 additions & 1 deletion b/‎ltsm/models/__init__.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎tests/common/base_training_pipeline_test.py‎
Lines changed: 4 additions & 2 deletions b/‎tests/common/base_training_pipeline_test.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎…mpt_reader/prompt_generate_split_test.py‎ ‎…s/data_provider/prompt_generator_test.py‎tests/prompt_reader/prompt_generate_split_test.py renamed to tests/data_provider/prompt_generator_test.py
Lines changed: 35 additions & 4 deletions b/‎…mpt_reader/prompt_generate_split_test.py‎ ‎…s/data_provider/prompt_generator_test.py‎tests/prompt_reader/prompt_generate_split_test.py renamed to tests/data_provider/prompt_generator_test.py
Lines changed: 35 additions & 4 deletions
diff --git a/‎tests/model/DLinear_test.py‎ ‎tests/models/DLinear_test.py‎tests/model/DLinear_test.py renamed to tests/models/DLinear_test.py b/‎tests/model/DLinear_test.py‎ ‎tests/models/DLinear_test.py‎tests/model/DLinear_test.py renamed to tests/models/DLinear_test.py
diff --git a/‎tests/model/Informer_test.py‎ ‎tests/models/Informer_test.py‎tests/model/Informer_test.py renamed to tests/models/Informer_test.py b/‎tests/model/Informer_test.py‎ ‎tests/models/Informer_test.py‎tests/model/Informer_test.py renamed to tests/models/Informer_test.py
diff --git a/‎tests/model/PatchTST_test.py‎ ‎tests/models/PatchTST_test.py‎tests/model/PatchTST_test.py renamed to tests/models/PatchTST_test.py b/‎tests/model/PatchTST_test.py‎ ‎tests/models/PatchTST_test.py‎tests/model/PatchTST_test.py renamed to tests/models/PatchTST_test.py
diff --git a/‎tests/model/__init__.py‎ ‎tests/models/__init__.py‎tests/model/__init__.py renamed to tests/models/__init__.py b/‎tests/model/__init__.py‎ ‎tests/models/__init__.py‎tests/model/__init__.py renamed to tests/models/__init__.py
@@ -52,6 +52,7 @@ class TrainingConfig:
         "data": "ETTh1",
         "features": "MS",
         "prompt_data_path": "./weather.csv",
+        "hf_hub_model": None
     }
 
     def __init__(self, model_config: PretrainedConfig, **kwargs):
@@ -126,7 +127,7 @@ def __init__(self,
         self.config = config
 
         if not model:
-            self.model = get_model(config.model_config, config.train_params["model"], config.train_params["local_pretrain"])
+            self.model = get_model(config.model_config, config.train_params["model"], config.train_params["local_pretrain"], config.train_params["hf_hub_model"])
 
         if self.config.train_params["lora"]:
             peft_config = LoraConfig(
 
@@ -13,6 +13,7 @@
 )
 from .data_splitter import SplitterByTimestamp
 from .dataset import TSDataset, TSPromptDataset, TSTokenDataset
+from .prompt_generator import prompt_generate_split, prompt_normalization_split
 
 __all__ = {
     DatasetFactory,
@@ -29,5 +30,7 @@
     SplitterByTimestamp,
     TSDataset, 
     TSPromptDataset, 
-    TSTokenDataset
+    TSTokenDataset,
+    prompt_generate_split,
+    prompt_normalization_split
 }
@@ -29,14 +29,15 @@ def register_model(module, module_name: str):
 register_model(DLinear, 'DLinear')
 register_model(Informer, 'Informer')
 
-def get_model(config: PretrainedConfig, model_name: str, local_pretrain: str = None) -> PreTrainedModel:
+def get_model(config: PretrainedConfig, model_name: str, local_pretrain: str = None, hf_hub_model: str = None) -> PreTrainedModel:
     """
     Factory method to create a model by name.
     
     Args:
         config (PreTrainedConfig): The configuration for the model.
         model_name (str): The name of the model to instantiate.
         local_pretrain (bool): If True, load the model from a local pretraining path.
+        hf_hub_model (str): The Hugging Face Hub model name.
     
     Returns:
         torch.nn.Module: Instantiated model.
@@ -47,6 +48,10 @@ def get_model(config: PretrainedConfig, model_name: str, local_pretrain: str = N
     if model_name not in model_dict:
         raise ValueError(f"Model {model_name} is not registered. Available models: {list(model_dict.keys())}")
 
+    # Load pretrained weights if hf_hub_model is provided
+    if hf_hub_model is not None:
+        return model_dict[model_name].from_pretrained(hf_hub_model, config)
+
     # Check for local pretraining
     if local_pretrain is None or local_pretrain == "None":
         return model_dict[model_name](config)
 
@@ -22,7 +22,8 @@ def mock_pipeline(mocker):
         "train_ratio": 0.7,
         "val_ratio": 0.1,
         "downsample_rate": 1,
-        "do_anomaly": False
+        "do_anomaly": False,
+        "hf_hub_model": None
     }
     config.model_config = mocker.MagicMock()
     config.train_params["lora"] =  False
@@ -50,7 +51,8 @@ def test_create_model_lora_enabled(mocker):
         "tmax": 10,
         "learning_rate": 1e-3,
         "model": "LTSM",
-        "local_pretrain": "None"
+        "local_pretrain": "None",
+        "hf_hub_model": None
     }
     config.model_config = mocker.MagicMock()
     config.train_params["lora"] =  True
 
@@ -3,12 +3,12 @@
 import pandas as pd
 import numpy as np
 import torch
+from ltsm.data_provider.prompt_generator import save_data, prompt_save
 
 @pytest.fixture
-def setup(mocker, tmp_path):
+def setup_prompt(mocker, tmp_path):
     """set up the test environment"""
     mocker.patch.dict('sys.modules', {'tsfel': mocker.MagicMock()})
-    from ltsm.prompt_reader.stat_prompt.prompt_generate_split import prompt_save
 
     sample_prompt_buf = {
         'train': pd.DataFrame({
@@ -36,10 +36,10 @@ def setup(mocker, tmp_path):
     return prompt_save, sample_prompt_buf, output_path, data_name,  ifTest
 
 @pytest.mark.parametrize("save_format", ["pth.tar", "csv", "npz"])
-def test_prompt_save(setup, save_format):
+def test_prompt_save(setup_prompt, save_format):
     """test if the prompt data is saved correctly in different formats and loaded back correctly
     """
-    prompt_save, sample_prompt_buf, output_path, data_name, ifTest = setup
+    prompt_save, sample_prompt_buf, output_path, data_name, ifTest = setup_prompt
     prompt_save(sample_prompt_buf, output_path, data_name, save_format, ifTest)
 
     for split in ["train", "val", "test"]:
@@ -75,3 +75,34 @@ def test_prompt_save(setup, save_format):
             if save_format != "csv":
                 assert load_data.equals(prompt_data), f"Data mismatch: {load_data} vs {prompt_data}"
             print(f"All tests passed for {file_path}")
+
+
+@pytest.fixture
+def setup_save():
+    """input data for testing"""
+    data = pd.DataFrame([range(133)])
+    print(data.shape)
+    return data
+
+@pytest.mark.parametrize("save_format", ["pth.tar", "csv", "npz"])
+def test_save_data(tmpdir, setup_save, save_format):
+    """test save_data function: save data in different formats and load it back to check if the data is saved correctly"""
+    data_path = os.path.join(tmpdir, f"test_data.{save_format}")
+    
+    save_data(setup_save, data_path, save_format)
+    
+    if save_format == "pth.tar":
+        loaded_data = torch.load(data_path)
+    elif save_format == "csv":
+        loaded_data = pd.read_csv(data_path)
+        loaded_data.columns = loaded_data.columns.astype(int)
+    elif save_format == "npz":
+        loaded = np.load(data_path)
+        loaded_data = pd.DataFrame(data=loaded["data"])
+
+    assert isinstance(loaded_data, pd.DataFrame), "Loaded data should be a DataFrame"
+    assert loaded_data.shape == setup_save.shape, f"Shape mismatch: {loaded_data.shape} vs {setup_save.shape}"
+    assert loaded_data.columns.equals(setup_save.columns), "Columns mismatch"
+    assert np.allclose(loaded_data.values, setup_save.values, rtol=1e-8, atol=1e-8), "Data values mismatch"
+
+