CESNET
diff --git a/‎tutorial_notebooks/annotations.ipynb‎
Lines changed: 1912 additions & 0 deletions b/‎tutorial_notebooks/annotations.ipynb‎
Lines changed: 1912 additions & 0 deletions
diff --git a/‎tutorial_notebooks/benchmarks.ipynb‎
Lines changed: 1499 additions & 0 deletions b/‎tutorial_notebooks/benchmarks.ipynb‎
Lines changed: 1499 additions & 0 deletions
diff --git a/‎tutorial_notebooks/handling_missing_data.ipynb‎
Lines changed: 3766 additions & 0 deletions b/‎tutorial_notebooks/handling_missing_data.ipynb‎
Lines changed: 3766 additions & 0 deletions
diff --git a/‎tutorial_notebooks/series_based_choosing_data.ipynb‎
Lines changed: 1150 additions & 0 deletions b/‎tutorial_notebooks/series_based_choosing_data.ipynb‎
Lines changed: 1150 additions & 0 deletions
diff --git a/‎tutorial_notebooks/series_based_loading_data.ipynb‎
Lines changed: 14116 additions & 0 deletions b/‎tutorial_notebooks/series_based_loading_data.ipynb‎
Lines changed: 14116 additions & 0 deletions
diff --git a/‎tutorial_notebooks/series_based_using_scalers.ipynb‎
Lines changed: 1404 additions & 0 deletions b/‎tutorial_notebooks/series_based_using_scalers.ipynb‎
Lines changed: 1404 additions & 0 deletions
diff --git a/‎tutorial_notebooks/simple_forecasting.ipynb‎
Lines changed: 394 additions & 0 deletions b/‎tutorial_notebooks/simple_forecasting.ipynb‎
Lines changed: 394 additions & 0 deletions
@@ -0,0 +1,394 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example of usage for simple forecasting"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tqdm\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import numpy as np\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "\n",
+    "from cesnet_tszoo.utils.enums import FillerType, ScalerType\n",
+    "from cesnet_tszoo.benchmarks import load_benchmark"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Preparing dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "File size: 0.01GB\n",
+      "Remaining: 0.01GB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 9.59M/9.59M [00:00<00:00, 25.2MB/s]\n",
+      "100%|██████████| 283/283 [00:03<00:00, 71.15it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Config Details\n",
+      "    Used for database: CESNET-TimeSeries24\n",
+      "    Aggregation: AgreggationType.AGG_1_DAY\n",
+      "    Source: SourceType.INSTITUTIONS\n",
+      "\n",
+      "    Time series\n",
+      "        Time series IDS: [ 30 222 276  48 243 ... 112  19  15 101 117], Length=283\n",
+      "        Test time series IDS: None\n",
+      "    Time periods\n",
+      "        Train time periods: range(0, 168)\n",
+      "        Val time periods: range(161, 196)\n",
+      "        Test time periods: range(189, 280)\n",
+      "        All time periods: range(0, 280)\n",
+      "    Features\n",
+      "        Taken features: ['n_bytes']\n",
+      "        Default values: [nan]\n",
+      "        Time series ID included: False\n",
+      "        Time included: False\n",
+      "    Sliding window\n",
+      "        Sliding window size: 7\n",
+      "        Sliding window prediction size: 1\n",
+      "        Sliding window step size: 1\n",
+      "        Set shared size: 7\n",
+      "    Fillers\n",
+      "        Filler type: None\n",
+      "    Scalers\n",
+      "        Scaler type: None\n",
+      "    Batch sizes\n",
+      "        Train batch size: 32\n",
+      "        Val batch size: 64\n",
+      "        Test batch size: 128\n",
+      "        All batch size: 128\n",
+      "    Default workers\n",
+      "        Init worker count: 4\n",
+      "        Train worker count: 4\n",
+      "        Val worker count: 3\n",
+      "        Test worker count: 2\n",
+      "        All worker count: 4\n",
+      "    Other\n",
+      "        Nan threshold: 1.0\n",
+      "        Random state: None\n",
+      "                \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "benchmark = load_benchmark(identifier=\"0d523e69c328\",  data_root=\"/some_directory/\")\n",
+    "dataset = benchmark.get_initialized_dataset()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Changing used config values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 283/283 [00:03<00:00, 72.20it/s]\n",
+      "100%|██████████| 283/283 [00:04<00:00, 70.63it/s]\n",
+      "100%|██████████| 283/283 [00:03<00:00, 70.90it/s]\n",
+      "100%|██████████| 283/283 [00:03<00:00, 72.05it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Config Details\n",
+      "    Used for database: CESNET-TimeSeries24\n",
+      "    Aggregation: AgreggationType.AGG_1_DAY\n",
+      "    Source: SourceType.INSTITUTIONS\n",
+      "\n",
+      "    Time series\n",
+      "        Time series IDS: [ 30 222 276  48 243 ... 112  19  15 101 117], Length=283\n",
+      "        Test time series IDS: None\n",
+      "    Time periods\n",
+      "        Train time periods: range(0, 168)\n",
+      "        Val time periods: range(144, 196)\n",
+      "        Test time periods: range(172, 280)\n",
+      "        All time periods: range(0, 280)\n",
+      "    Features\n",
+      "        Taken features: ['n_bytes']\n",
+      "        Default values: [0.]\n",
+      "        Time series ID included: False\n",
+      "        Time included: False\n",
+      "    Sliding window\n",
+      "        Sliding window size: 24\n",
+      "        Sliding window prediction size: 1\n",
+      "        Sliding window step size: 1\n",
+      "        Set shared size: 24\n",
+      "    Fillers\n",
+      "        Filler type: mean_filler\n",
+      "    Scalers\n",
+      "        Scaler type: min_max_scaler\n",
+      "        Is scaler per Time series: True\n",
+      "        Are scalers premade: False\n",
+      "        Are premade scalers partial_fitted: False\n",
+      "    Batch sizes\n",
+      "        Train batch size: 32\n",
+      "        Val batch size: 64\n",
+      "        Test batch size: 128\n",
+      "        All batch size: 128\n",
+      "    Default workers\n",
+      "        Init worker count: 4\n",
+      "        Train worker count: 4\n",
+      "        Val worker count: 3\n",
+      "        Test worker count: 2\n",
+      "        All worker count: 4\n",
+      "    Other\n",
+      "        Nan threshold: 1.0\n",
+      "        Random state: None\n",
+      "                \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# (optional) Set default value for missing data \n",
+    "dataset.set_default_values(0)\n",
+    "\n",
+    "# (optional) Set filler for filling missing data \n",
+    "dataset.apply_filler(FillerType.MEAN_FILLER)\n",
+    "\n",
+    "# (optional) Set scaller for data\n",
+    "dataset.apply_scaler(ScalerType.MIN_MAX_SCALER)\n",
+    "\n",
+    "# (optional) Change sliding window setting\n",
+    "dataset.set_sliding_window(sliding_window_size=24, sliding_window_prediction_size=1, sliding_window_step=1, set_shared_size=24)\n",
+    "\n",
+    "# (optional) Change batch sizes\n",
+    "dataset.set_batch_sizes()\n",
+    "\n",
+    "# Display final config\n",
+    "dataset.display_config()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using simple LSTM model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Creating class for model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "class SimpleLSTM(nn.Module):\n",
+    "    def __init__(self, input_size=1, hidden_size=8, output_size=1):\n",
+    "        super(SimpleLSTM, self).__init__()\n",
+    "        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)\n",
+    "        self.fc = nn.Linear(hidden_size, output_size)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        _, (h_n, _) = self.lstm(x)  # h_n: (1, batch, hidden)\n",
+    "        out = self.fc(h_n[-1])      # (batch, output_size)\n",
+    "        return out.unsqueeze(1)     # (batch, 1, output_size)\n",
+    "    \n",
+    "    def fit(self, train_dataloader, val_dataloader, n_epochs, device):\n",
+    "        self.train()\n",
+    "        criterion = nn.MSELoss()\n",
+    "        optimizer = optim.Adam(self.parameters(), lr=0.01)\n",
+    "        for epoch in range(n_epochs):\n",
+    "            train_losses = []\n",
+    "            val_losses = []\n",
+    "            for (batch_train, batch_val) in zip(train_dataloader, val_dataloader):\n",
+    "                batch_x, batch_y = batch_train\n",
+    "                batch_x = torch.tensor(batch_x, dtype=torch.float32).to(device)\n",
+    "                batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)\n",
+    "\n",
+    "                optimizer.zero_grad()\n",
+    "                output = self(batch_x)\n",
+    "                loss = criterion(output, batch_y)\n",
+    "                loss.backward()\n",
+    "                optimizer.step()\n",
+    "                train_losses.append(loss.item())\n",
+    "\n",
+    "                # validation loss\n",
+    "                with torch.no_grad():\n",
+    "                    batch_x_val, batch_y_val = batch_val\n",
+    "                    batch_x_val = torch.tensor(batch_x_val, dtype=torch.float32).to(device)\n",
+    "                    batch_y_val = torch.tensor(batch_y_val, dtype=torch.float32).to(device)\n",
+    "                    val_output = self(batch_x_val)\n",
+    "                    val_loss = criterion(val_output, batch_y_val)\n",
+    "                    val_losses.append(val_loss.item())\n",
+    "\n",
+    "    \n",
+    "    def predict(self, test_dataloader, device):\n",
+    "        self.eval()\n",
+    "        all_preds = []\n",
+    "        all_targets = []\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            for batch_x_test, batch_y_test in test_dataloader:\n",
+    "                batch_x_test = torch.tensor(batch_x_test, dtype=torch.float32).to(device)\n",
+    "                batch_y_test = torch.tensor(batch_y_test, dtype=torch.float32).to(device)\n",
+    "\n",
+    "                output = self(batch_x_test)\n",
+    "                all_preds.append(output.cpu().numpy().flatten())\n",
+    "                all_targets.append(batch_y_test.cpu().numpy().flatten())\n",
+    "\n",
+    "        y_pred = np.concatenate(all_preds)\n",
+    "        y_true = np.concatenate(all_targets)\n",
+    "        return y_pred, y_true"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Training model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 283/283 [02:08<00:00,  2.21it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = []\n",
+    "for ts_id in tqdm.tqdm(dataset.get_data_about_set(about='train')['ts_ids']):\n",
+    "    model = SimpleLSTM().to(device)\n",
+    "    model.fit(\n",
+    "        dataset.get_train_dataloader(ts_id), \n",
+    "        dataset.get_val_dataloader(ts_id), \n",
+    "        n_epochs=5, \n",
+    "        device=device,\n",
+    "    )\n",
+    "    y_pred, y_true = model.predict(\n",
+    "        dataset.get_test_dataloader(ts_id), \n",
+    "        device=device,\n",
+    "    )\n",
+    "    \n",
+    "    rmse = mean_squared_error(y_true, y_pred)\n",
+    "    results.append(rmse)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Final prediction scores on test set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean RMSE: 0.082187\n",
+      "Std RMSE: 0.146893\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Mean RMSE: {np.mean(results):.6f}\")\n",
+    "print(f\"Std RMSE: {np.std(results):.6f}\") "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}