From 73c0bba3e6f2bbb846311c7f8917fd880af984a5 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Fri, 13 Dec 2024 21:42:48 +0900 Subject: [PATCH 1/5] pkg: re-enable readme as PyPI's package description --- .github/workflows/release-pypi.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index fe73818..08d1119 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -21,8 +21,6 @@ jobs: - name: Install dependencies run: pip install -U build twine - - name: Overview Readme for release - run: echo "# Lit Models" > README.md - name: Build package run: python -m build - name: Check package From 3619f3b9e09b96a7a6e5d6f0c254a9dcb0381c12 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Fri, 13 Dec 2024 22:15:27 +0900 Subject: [PATCH 2/5] callback example --- README.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f597204..ce4d41c 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,10 @@ from lightning import Trainer from litmodels import download_model from litmodels.demos import BoringModel +# Define the model name - this should be unique to your model +# The format is //: +MY_MODEL_NAME = "jirka/kaggle/lit-boring-model:latest" + class LitModel(BoringModel): def training_step(self, batch, batch_idx: int): @@ -92,22 +96,63 @@ class LitModel(BoringModel): return {"loss": loss} -# Define the model name - this should be unique to your model -# The format is //: -MY_MODEL_NAME = "jirka/kaggle/lit-boring-model:latest" - # Load the model from cloud storage model_path = download_model(name=MY_MODEL_NAME, download_dir="my_models") print(f"model: {model_path}") # Train the model with extended training period trainer = Trainer(max_epochs=4) +trainer.fit(LitModel(), ckpt_path=model_path) +``` + +YOu can also enhance your training with simple Checkpointing callback which would always save the best model to the cloud storage and continue training. +This can would be handy especially with long trainings or using interruptible machines so you would always resume/recover from the best model. + +```python +import torch.utils.data as data +import torchvision as tv +from lightning import Callback, Trainer +from litmodels import upload_model +from litmodels.demos import BoringModel + +# Define the model name - this should be unique to your model +# The format is // +MY_MODEL_NAME = "jirka/kaggle/lit-auto-encoder-callback" + + +class LitModel(BoringModel): + def training_step(self, batch, batch_idx: int): + loss = self.step(batch) + # logging the computed loss + self.log("train_loss", loss) + return {"loss": loss} + + +class UploadModelCallback(Callback): + def on_train_epoch_end(self, trainer, pl_module): + # Get the best model path from the checkpoint callback + best_model_path = trainer.checkpoint_callback.best_model_path + if best_model_path: + print(f"Uploading model: {best_model_path}") + upload_model(model=best_model_path, name=MY_MODEL_NAME) + + +dataset = tv.datasets.MNIST(".", download=True, transform=tv.transforms.ToTensor()) +train, val = data.random_split(dataset, [55000, 5000]) + +trainer = Trainer( + max_epochs=2, + callbacks=[UploadModelCallback()], +) trainer.fit( LitModel(), - ckpt_path=model_path, + data.DataLoader(train, batch_size=256), + data.DataLoader(val, batch_size=256), ) ``` +## Logging Models + You can also use model store together with [LitLogger](https://github.com/gridai/lit-logger) to log your model to the cloud storage. ```python From 9a86071faada5731753ba674b5b38da65399f1de Mon Sep 17 00:00:00 2001 From: Jirka B Date: Fri, 13 Dec 2024 22:21:19 +0900 Subject: [PATCH 3/5] update --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ce4d41c..1f9c565 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ YOu can also enhance your training with simple Checkpointing callback which woul This can would be handy especially with long trainings or using interruptible machines so you would always resume/recover from the best model. ```python +import os import torch.utils.data as data import torchvision as tv from lightning import Callback, Trainer @@ -131,10 +132,9 @@ class LitModel(BoringModel): class UploadModelCallback(Callback): def on_train_epoch_end(self, trainer, pl_module): # Get the best model path from the checkpoint callback - best_model_path = trainer.checkpoint_callback.best_model_path - if best_model_path: - print(f"Uploading model: {best_model_path}") - upload_model(model=best_model_path, name=MY_MODEL_NAME) + checkpoint_path = getattr(trainer.checkpoint_callback, "best_model_path") + if checkpoint_path and os.path.exists(checkpoint_path): + upload_model(model=checkpoint_path, name=MY_MODEL_NAME) dataset = tv.datasets.MNIST(".", download=True, transform=tv.transforms.ToTensor()) From 44f12e636fbf4afcfd59eafa25d6199787db09ca Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Sat, 14 Dec 2024 02:12:24 +0900 Subject: [PATCH 4/5] Update release-pypi.yml --- .github/workflows/release-pypi.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 08d1119..fe73818 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -21,6 +21,8 @@ jobs: - name: Install dependencies run: pip install -U build twine + - name: Overview Readme for release + run: echo "# Lit Models" > README.md - name: Build package run: python -m build - name: Check package From 330e0e519499a168290659a6ac820162d97a6533 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:00:01 +0900 Subject: [PATCH 5/5] Apply suggestions from code review --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 64b3acf..3614e97 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ trainer = Trainer(max_epochs=4) trainer.fit(LitModel(), ckpt_path=checkpoint_path) ``` -YOu can also enhance your training with simple Checkpointing callback which would always save the best model to the cloud storage and continue training. +You can also enhance your training with a simple Checkpointing callback which would always save the best model to the cloud storage and continue training. This can would be handy especially with long trainings or using interruptible machines so you would always resume/recover from the best model. ```python