moved mlflow into run and added tutorial

qwertd105 · qwertd105 · commit 18ef5ab62abb · 2024-08-21T14:12:08.000+03:00
diff --git a/cool_graph/runners.py b/cool_graph/runners.py
@@ -362,9 +362,6 @@ def __init__(
         for k, v in kwargs.items():
             setattr(self, k, v)
 
-        if self.cfg["logging"].get("use_mlflow", False):
-            setup_mlflow_from_config(cfg["logging"]["mlflow"])
-
     def sample_data(self) -> None:
         """
         Sampling data into batches and sampling data with NeighborLoader into list loaders.
@@ -521,7 +518,10 @@ def run(self, train_loader=None, test_loader=None) -> Dict[str, float]:
         """
         self.train_loader = train_loader
         self.test_loader = test_loader
-
+        
+        if self.cfg["logging"].get("use_mlflow", False):
+            setup_mlflow_from_config(self.cfg["logging"]["mlflow"])
+        
         if (self.train_loader is None) and (self.test_loader is None):
             self.sample_data()
         elif "index" not in self.data.keys:
@@ -800,7 +800,10 @@ def optimize_run(
         Returns:
             trials_dataset (pd.DataFrame): Result dataframe with trial params.
         """
-
+        
+        if self.cfg["logging"].get("use_mlflow", False):
+            setup_mlflow_from_config(self.cfg["logging"]["mlflow"])
+        
         self.train_loader = train_loader
         self.test_loader = test_loader
 
diff --git a/cool_graph/train/trainer.py b/cool_graph/train/trainer.py
@@ -251,9 +251,10 @@ def train(self, start_epoch: int = 0, end_epoch: Optional[int] = None) -> Dict[
                     embedding_data=self.embedding_data,
                 )
                 test_tasks = test_metric["tasks"]
-                self.mlflow_log_metrics(
-                    metrics=add_prefix_to_dict_keys(test_metric, "test_"), step=epoch
-                )
+                for task in test_tasks:
+                    self.mlflow_log_metrics(
+                        metrics=add_prefix_to_dict_keys(test_tasks[task], f"{task}_test_"), step=epoch
+                    )
                 test_metric["epoch"] = epoch
                 self._test_metric_lst.append(test_metric)
                 with open(
@@ -280,9 +281,10 @@ def train(self, start_epoch: int = 0, end_epoch: Optional[int] = None) -> Dict[
                     embedding_data=self.embedding_data,
                 )
                 train_tasks = train_metric["tasks"]
-                self.mlflow_log_metrics(
-                    metrics=add_prefix_to_dict_keys(train_metric, "train_"), step=epoch
-                )
+                for task in train_tasks:
+                    self.mlflow_log_metrics(
+                        metrics=add_prefix_to_dict_keys(train_tasks[task], f"{task}_train_"), step=epoch
+                    )
                 train_metric["epoch"] = epoch
                 self._train_metric_lst.append(train_metric)
                 with open(
@@ -339,9 +341,10 @@ def train(self, start_epoch: int = 0, end_epoch: Optional[int] = None) -> Dict[
         test_metric = pd.DataFrame(self._test_metric_lst)
         train_metric = pd.DataFrame(self._train_metric_lst)
 
-        self.mlflow_log_metrics(
-            metrics=add_prefix_to_dict_keys(self._best_loss, "best_")
-        )
+        for task in self._best_loss["tasks"]:  
+            self.mlflow_log_metrics(
+                metrics=add_prefix_to_dict_keys(self._best_loss["tasks"][task], f"{task}_best_")
+            )
         self.mlflow_log_metrics({"global_calc_time": self.global_calc_time})
 
         if self.use_mlflow:
diff --git a/notebooks/Integration_with_mlflow.ipynb b/notebooks/Integration_with_mlflow.ipynb
@@ -0,0 +1,248 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e81c847e",
+   "metadata": {},
+   "source": [
+    "# Integration with mlflow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6aa588c",
+   "metadata": {},
+   "source": [
+    "### Summary: \n",
+    "##### Let's change config parameters to track training with mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "35d5f728",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-08-21T10:36:36.576223Z",
+     "iopub.status.busy": "2024-08-21T10:36:36.575851Z",
+     "iopub.status.idle": "2024-08-21T10:36:36.579517Z",
+     "shell.execute_reply": "2024-08-21T10:36:36.579010Z",
+     "shell.execute_reply.started": "2024-08-21T10:36:36.576205Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# imports for loading the dataset\n",
+    "from torch_geometric import datasets\n",
+    "import torch\n",
+    "import pandas as pd\n",
+    "from torch_geometric.data import Data\n",
+    "# importing Runner\n",
+    "from cool_graph.runners import Runner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d82a914c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-08-21T10:37:07.692657Z",
+     "iopub.status.busy": "2024-08-21T10:37:07.692267Z",
+     "iopub.status.idle": "2024-08-21T10:37:29.043296Z",
+     "shell.execute_reply": "2024-08-21T10:37:29.042582Z",
+     "shell.execute_reply.started": "2024-08-21T10:37:07.692635Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz\n",
+      "Processing...\n",
+      "Done!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Data(x=[13752, 767], edge_index=[2, 491722], y=[13752])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# use simple Amazon dataset with Computers\n",
+    "dataset = datasets.Amazon(root='./data/Amazon', name='Computers')\n",
+    "data = dataset.data\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3faa80be",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-08-21T10:39:13.803082Z",
+     "iopub.status.busy": "2024-08-21T10:39:13.802561Z",
+     "iopub.status.idle": "2024-08-21T10:39:13.951912Z",
+     "shell.execute_reply": "2024-08-21T10:39:13.950993Z",
+     "shell.execute_reply.started": "2024-08-21T10:39:13.803052Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# initializing Runner\n",
+    "runner = Runner(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c0c84614",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-08-21T10:55:27.607989Z",
+     "iopub.status.busy": "2024-08-21T10:55:27.607408Z",
+     "iopub.status.idle": "2024-08-21T10:55:27.612779Z",
+     "shell.execute_reply": "2024-08-21T10:55:27.612280Z",
+     "shell.execute_reply.started": "2024-08-21T10:55:27.607965Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import urllib3\n",
+    "\n",
+    "runner.cfg[\"logging\"][\"use_mlflow\"] = True # making flag True to use mlflow\n",
+    "runner.cfg[\"logging\"][\"mlflow\"] = {\n",
+    "     \"MLFLOW_TRACKING_URI\": \"https://ml-flow.msk.bd-cloud.mts.ru/\", # uri of \n",
+    "     \"MLFLOW_TRACKING_USERNAME\": \"username\",\n",
+    "     \"MLFLOW_TRACKING_PASSWORD\": \"password\",\n",
+    "     \"MLFLOW_S3_ENDPOINT_URL\": \"https://s3.mts-corp.ru\", # to save artifacts\n",
+    "     \"AWS_ACCESS_KEY_ID\": \"access_key\", # to save artifacts\n",
+    "     \"AWS_SECRET_ACCESS_KEY\": \"secret_access_key\", # to save artifacts\n",
+    "     \"MLFLOW_TRACKING_INSECURE_TLS\": \"true\", # to ignore the TLS certificate verification\n",
+    "     \"MLFLOW_S3_IGNORE_TLS\": \"true\", # to ignore the TLS certificate verification\n",
+    "     \"MLFLOW_DISABLE_INSECURE_REQUEST_WARNING\": True # to disable warnings\n",
+    "    }\n",
+    "runner.cfg[\"logging\"][\"mlflow_experiment_name\"] = \"coolgraph_example\" # name of experiment\n",
+    "\n",
+    "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # disabling request warning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f154a32f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-08-21T10:55:36.670982Z",
+     "iopub.status.busy": "2024-08-21T10:55:36.670511Z",
+     "iopub.status.idle": "2024-08-21T10:56:53.936843Z",
+     "shell.execute_reply": "2024-08-21T10:56:53.935934Z",
+     "shell.execute_reply.started": "2024-08-21T10:55:36.670959Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Sample data: 100%|██████████| 42/42 [00:04<00:00, 10.12it/s]\n",
+      "Sample data: 100%|██████████| 14/14 [00:00<00:00, 14.57it/s]\n",
+      "2024/08/21 13:55:44 INFO mlflow.tracking.fluent: Experiment with name 'coolgraph_example' does not exist. Creating a new experiment.\n",
+      "2024-08-21 13:55:51 - epoch 0 test:            \n",
+      " {'accuracy': 0.505, 'cross_entropy': 1.31, 'f1_weighted': 0.451, 'calc_time': 0.006, 'main_metric': 0.505}\n",
+      "2024-08-21 13:55:53 - epoch 0 train:           \n",
+      " {'accuracy': 0.497, 'cross_entropy': 1.313, 'f1_weighted': 0.442, 'calc_time': 0.016, 'main_metric': 0.497}\n",
+      "2024-08-21 13:56:06 - epoch 5 test:            \n",
+      " {'accuracy': 0.9, 'cross_entropy': 0.299, 'f1_weighted': 0.899, 'calc_time': 0.006, 'main_metric': 0.9}\n",
+      "2024-08-21 13:56:07 - epoch 5 train:           \n",
+      " {'accuracy': 0.919, 'cross_entropy': 0.246, 'f1_weighted': 0.918, 'calc_time': 0.012, 'main_metric': 0.919}\n",
+      "2024-08-21 13:56:20 - epoch 10 test:           \n",
+      " {'accuracy': 0.918, 'cross_entropy': 0.268, 'f1_weighted': 0.918, 'calc_time': 0.006, 'main_metric': 0.918}\n",
+      "2024-08-21 13:56:21 - epoch 10 train:          \n",
+      " {'accuracy': 0.953, 'cross_entropy': 0.156, 'f1_weighted': 0.953, 'calc_time': 0.014, 'main_metric': 0.953}\n",
+      "2024-08-21 13:56:34 - epoch 15 test:           \n",
+      " {'accuracy': 0.92, 'cross_entropy': 0.277, 'f1_weighted': 0.92, 'calc_time': 0.005, 'main_metric': 0.92}\n",
+      "2024-08-21 13:56:35 - epoch 15 train:          \n",
+      " {'accuracy': 0.961, 'cross_entropy': 0.124, 'f1_weighted': 0.961, 'calc_time': 0.011, 'main_metric': 0.961}\n",
+      "2024-08-21 13:56:48 - epoch 20 test:           \n",
+      " {'accuracy': 0.921, 'cross_entropy': 0.296, 'f1_weighted': 0.92, 'calc_time': 0.008, 'main_metric': 0.921}\n",
+      "2024-08-21 13:56:50 - epoch 20 train:          \n",
+      " {'accuracy': 0.969, 'cross_entropy': 0.098, 'f1_weighted': 0.969, 'calc_time': 0.012, 'main_metric': 0.969}\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = runner.run()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c835b041",
+   "metadata": {},
+   "source": [
+    "### Let's see the results on Mlflow tracker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c67c3798",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-08-21T11:08:43.663318Z",
+     "iopub.status.busy": "2024-08-21T11:08:43.662778Z",
+     "iopub.status.idle": "2024-08-21T11:08:43.939813Z",
+     "shell.execute_reply": "2024-08-21T11:08:43.938751Z",
+     "shell.execute_reply.started": "2024-08-21T11:08:43.663287Z"
+    }
+   },
+   "source": [
+    "![mlflow_result](./src/image_2024-08-21_13-59-13.png) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cbac9ca5",
+   "metadata": {},
+   "source": [
+    "### Success!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "686924be",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "CGKerner",
+   "language": "python",
+   "name": "cgkerner"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/src/image_2024-08-21_13-59-13.png b/notebooks/src/image_2024-08-21_13-59-13.png