diff --git a/docs/source-pytorch/extensions/logging.rst b/docs/source-pytorch/extensions/logging.rst
index f0c12464e6db2..6f3daa1fa7d43 100644
--- a/docs/source-pytorch/extensions/logging.rst
+++ b/docs/source-pytorch/extensions/logging.rst
@@ -31,6 +31,7 @@ The following are loggers we support:
CSVLogger
MLFlowLogger
NeptuneLogger
+ NeptuneScaleLogger
TensorBoardLogger
WandbLogger
diff --git a/docs/source-pytorch/visualize/supported_exp_managers.rst b/docs/source-pytorch/visualize/supported_exp_managers.rst
index 79c15f1c1309e..90bd3b7d0c56d 100644
--- a/docs/source-pytorch/visualize/supported_exp_managers.rst
+++ b/docs/source-pytorch/visualize/supported_exp_managers.rst
@@ -60,9 +60,9 @@ Here's the full documentation for the :class:`~lightning.pytorch.loggers.MLFlowL
----
-Neptune.ai
+Neptune 2.x
==========
-To use `Neptune.ai `_ first install the neptune package:
+To use `Neptune 2.x `_ first install the neptune package:
.. code-block:: bash
@@ -101,9 +101,9 @@ Here's the full documentation for the :class:`~lightning.pytorch.loggers.Neptune
----
-Neptune Scale
+Neptune 3.x (Neptune Scale)
==========
-To use `Neptune Scale `_ first install the neptune-scale package:
+To use `Neptune 3.x `_ first install the neptune-scale package:
.. code-block:: bash
@@ -119,8 +119,8 @@ Configure the logger and pass it to the :class:`~lightning.pytorch.trainer.train
from lightning.pytorch.loggers import NeptuneScaleLogger
neptune_scale_logger = NeptuneScaleLogger(
- api_key=, # replace with your own
- project="common/pytorch-lightning-integration", # format ""
+ api_key="", # replace with your own
+ project="/", # replace with your own
)
trainer = Trainer(logger=neptune_scale_logger)
diff --git a/requirements/pytorch/loggers.info b/requirements/pytorch/loggers.info
index 35f0126fcd629..ca0c8369935ab 100644
--- a/requirements/pytorch/loggers.info
+++ b/requirements/pytorch/loggers.info
@@ -1,7 +1,7 @@
# all supported loggers. this list is here as a reference, but they are not installed in CI
neptune >=1.0.0
-neptune-scale
+neptune-scale >= 0.12.0
comet-ml >=3.31.0
mlflow >=1.0.0
wandb >=0.12.10
diff --git a/src/lightning/pytorch/loggers/neptune.py b/src/lightning/pytorch/loggers/neptune.py
index c0bd5bf3839b0..802e59a0fc254 100644
--- a/src/lightning/pytorch/loggers/neptune.py
+++ b/src/lightning/pytorch/loggers/neptune.py
@@ -69,7 +69,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
class NeptuneLogger(Logger):
- r"""Log using `Neptune `_.
+ r"""Log using `Neptune `_.
Install it with pip:
@@ -129,7 +129,7 @@ def any_lightning_module_function_or_hook(self):
Note that the syntax ``self.logger.experiment["your/metadata/structure"].append(metadata)`` is specific to
Neptune and extends the logger capabilities. It lets you log various types of metadata, such as
scores, files, images, interactive visuals, and CSVs.
- Refer to the `Neptune docs `_
+ Refer to the `Neptune docs `_
for details.
You can also use the regular logger methods ``log_metrics()``, and ``log_hyperparams()`` with NeptuneLogger.
@@ -184,7 +184,7 @@ def any_lightning_module_function_or_hook(self):
)
trainer = Trainer(max_epochs=3, logger=neptune_logger)
- Check `run documentation `_
+ Check `run documentation `_
for more info about additional run parameters.
**Details about Neptune run structure**
@@ -196,18 +196,18 @@ def any_lightning_module_function_or_hook(self):
See also:
- Read about
- `what objects you can log to Neptune `_.
+ `what objects you can log to Neptune `_.
- Check out an `example run `_
with multiple types of metadata logged.
- For more detailed examples, see the
- `user guide `_.
+ `user guide `_.
Args:
api_key: Optional.
Neptune API token, found on https://www.neptune.ai upon registration.
You should save your token to the `NEPTUNE_API_TOKEN`
environment variable and leave the api_key argument out of your code.
- Instructions: `Setting your API token `_.
+ Instructions: `Setting your API token `_.
project: Optional.
Name of a project in the form "workspace-name/project-name", for example "tom/mask-rcnn".
If ``None``, the value of `NEPTUNE_PROJECT` environment variable is used.
@@ -377,7 +377,7 @@ def training_step(self, batch, batch_idx):
is specific to Neptune and extends the logger capabilities.
It lets you log various types of metadata, such as scores, files,
images, interactive visuals, and CSVs. Refer to the
- `Neptune docs `_
+ `Neptune docs `_
for more detailed explanations.
You can also use the regular logger methods ``log_metrics()``, and ``log_hyperparams()``
with NeptuneLogger.
@@ -600,7 +600,7 @@ def version(self) -> Optional[str]:
class NeptuneScaleLogger(Logger):
- r"""Log using `Neptune Scale `_.
+ r"""Log using `Neptune Scale `_.
Install it with pip:
@@ -630,7 +630,6 @@ class NeptuneScaleLogger(Logger):
.. code-block:: python
- from neptune.types import File
from lightning.pytorch import LightningModule
@@ -647,7 +646,7 @@ def any_lightning_module_function_or_hook(self):
Note that the syntax ``self.logger.run.log_metrics(data={"your/metadata/structure": metadata}, step=step)``
is specific to Neptune Scale.
- Refer to the `Neptune Scale docs `_ for details.
+ Refer to the `Neptune Scale docs `_ for details.
You can also use the regular logger methods ``log_metrics()``, and ``log_hyperparams()`` with NeptuneScaleLogger.
**Log after fitting or testing is finished**
@@ -670,6 +669,18 @@ def any_lightning_module_function_or_hook(self):
neptune_logger.run.log_configs(data={"your/metadata/structure": metadata})
neptune_logger.run.add_tags(["tag1", "tag2"])
+ **Log model checkpoint paths**
+
+ If you have :class:`~lightning.pytorch.callbacks.ModelCheckpoint` configured,
+ the Neptune logger can log model checkpoint paths.
+ Paths will be logged to the "model/checkpoints" namespace in the Neptune run.
+ You can disable this option with:
+
+ .. code-block:: python
+
+ neptune_logger = NeptuneScaleLogger(log_model_checkpoints=False)
+
+ Note: All model checkpoint paths will be logged. ``save_last`` and ``save_top_k`` are currently not supported.
**Pass additional parameters to the Neptune run**
@@ -688,7 +699,7 @@ def any_lightning_module_function_or_hook(self):
)
trainer = Trainer(max_epochs=3, logger=neptune_scale_logger)
- Check `run documentation `_ for more info about additional run
+ Check `run documentation `_ for more info about additional run
parameters.
**Details about Neptune run structure**
@@ -712,26 +723,30 @@ def any_lightning_module_function_or_hook(self):
Neptune API token, found on https://scale.neptune.ai upon registration.
You should save your token to the `NEPTUNE_API_TOKEN` environment variable and leave
the api_token argument out of your code.
- Instructions: `Setting your API token `_.
+ Instructions: `Setting your API token `_.
resume: Optional.
If `False`, creates a new run.
To continue an existing run, set to `True` and pass the ID of an existing run to the `run_id` argument.
In this case, omit the `experiment_name` parameter.
To fork a run, use `fork_run_id` and `fork_step` instead.
mode: Optional.
- `Mode `_ of operation.
+ `Mode `_ of operation.
If "disabled", the run doesn't log any metadata.
- If "offline", the run is only stored locally. For details, see `Offline logging `_.
+ If "offline", the run is only stored locally. For details, see `Offline logging `_.
If this parameter and the
- `NEPTUNE_MODE `_
+ `NEPTUNE_MODE `_
environment variable are not set, the default is "async".
experiment_name: Optional.
- Name of the experiment to associate the run with.
+ Name of the experiment to associate the run with.
Can't be used together with the `resume` parameter.
To make the name easy to read in the app, ensure that it's at most 190 characters long.
run: Optional. Default is ``None``. A Neptune ``Run`` object.
If specified, this existing run will be used for logging, instead of a new run being created.
prefix: Optional. Default is ``"training"``. Root namespace for all metadata logging.
+ log_model_checkpoints: Optional. Default is ``True``. Log model checkpoint paths to Neptune.
+ Works only if ``ModelCheckpoint`` is passed to the ``Trainer``.
+ NOTE: All model checkpoint paths will be logged.
+ ``save_last`` and ``save_top_k`` are currently not supported.
neptune_run_kwargs: Additional arguments like ``creation_time``, ``log_directory``,
``fork_run_id``, ``fork_step``, ``*_callback``, etc. used when a run is created.
@@ -757,6 +772,7 @@ def __init__(
experiment_name: Optional[str] = None,
run: Optional["Run"] = None,
prefix: str = "training",
+ log_model_checkpoints: Optional[bool] = True,
**neptune_run_kwargs: Any,
):
if not _NEPTUNE_SCALE_AVAILABLE:
@@ -778,16 +794,12 @@ def __init__(
self._run_id = run_id
self._experiment_name = experiment_name
self._prefix = prefix
+ self._log_model_checkpoints = log_model_checkpoints
self._neptune_run_kwargs = neptune_run_kwargs
self._description = self._neptune_run_kwargs.pop("description", None)
self._tags = self._neptune_run_kwargs.pop("tags", None)
self._group_tags = self._neptune_run_kwargs.pop("group_tags", None)
- if "log_model_checkpoints" in self._neptune_run_kwargs:
- log.warning("Neptune Scale does not support logging model checkpoints.")
- del self._neptune_run_kwargs["log_model_checkpoints"]
- self._log_model_checkpoints = False
-
if self._run_instance is not None:
self._retrieve_run_data()
@@ -887,7 +899,7 @@ def training_step(self, batch, batch_idx):
Note that the syntax ``self.logger.run.log_metrics(data={"your/metadata/structure": metadata}, step=step)``
is specific to Neptune Scale. Refer to the
- `Neptune Scale docs `_
+ `Neptune Scale docs `_
for more detailed explanations.
You can also use the regular logger methods ``log_metrics()``, and ``log_hyperparams()``
with NeptuneScaleLogger.
@@ -1004,7 +1016,7 @@ def finalize(self, status: str) -> None:
# initialized there
return
if status:
- self.run._status = status
+ self.run.log_configs({self._construct_path_with_prefix("status"): status})
super().finalize(status)
@@ -1025,25 +1037,100 @@ def save_dir(self) -> Optional[str]:
@rank_zero_only
def log_model_summary(self, model: "pl.LightningModule", max_depth: int = -1) -> None:
- """Not implemented for Neptune Scale."""
- log.warning("Neptune Scale does not support logging model summaries.")
- return
+ """Logs a summary of all layers in the model to Neptune as a text file."""
+ from neptune_scale.types import File
+
+ model_str = str(ModelSummary(model=model, max_depth=max_depth))
+ self.run.assign_files({
+ self._construct_path_with_prefix("model/summary"): File(
+ source=model_str.encode("utf-8"), mime_type="text/plain"
+ )
+ })
@override
@rank_zero_only
def after_save_checkpoint(self, checkpoint_callback: Checkpoint) -> None:
- """Not implemented for Neptune Scale."""
- return
+ """Automatically log checkpointed model's path. Called after model checkpoint callback saves a new checkpoint.
+
+ Args:
+ checkpoint_callback: the model checkpoint callback instance
+
+ """
+ if not self._log_model_checkpoints:
+ return
+
+ file_names = set()
+ checkpoints_namespace = self._construct_path_with_prefix("model/checkpoints")
+
+ # save last model
+ if hasattr(checkpoint_callback, "last_model_path") and checkpoint_callback.last_model_path:
+ model_last_name = self._get_full_model_name(checkpoint_callback.last_model_path, checkpoint_callback)
+ file_names.add(model_last_name)
+ self.run.log_configs({
+ f"{checkpoints_namespace}/{model_last_name}": checkpoint_callback.last_model_path,
+ })
+
+ # save best k models
+ if hasattr(checkpoint_callback, "best_k_models"):
+ for key in checkpoint_callback.best_k_models:
+ model_name = self._get_full_model_name(key, checkpoint_callback)
+ file_names.add(model_name)
+ self.run.log_configs({
+ f"{checkpoints_namespace}/{model_name}": key,
+ })
+
+ # log best model path and checkpoint
+ if hasattr(checkpoint_callback, "best_model_path") and checkpoint_callback.best_model_path:
+ self.run.log_configs({
+ self._construct_path_with_prefix("model/best_model_path"): checkpoint_callback.best_model_path,
+ })
+
+ model_name = self._get_full_model_name(checkpoint_callback.best_model_path, checkpoint_callback)
+ file_names.add(model_name)
+ self.run.log_configs({
+ f"{checkpoints_namespace}/{model_name}": checkpoint_callback.best_model_path,
+ })
+
+ # remove old models logged to experiment if they are not part of best k models at this point
+ # TODO: Implement after Neptune Scale supports `del`
+ # if self.run.exists(checkpoints_namespace):
+ # exp_structure = self.run.get_structure()
+ # uploaded_model_names = self._get_full_model_names_from_exp_structure(
+ # exp_structure, checkpoints_namespace
+ # )
+
+ # for file_to_drop in list(uploaded_model_names - file_names):
+ # del self.run[f"{checkpoints_namespace}/{file_to_drop}"]
+
+ # log best model score
+ if hasattr(checkpoint_callback, "best_model_score") and checkpoint_callback.best_model_score:
+ self.run.log_configs({
+ self._construct_path_with_prefix("model/best_model_score"): float(
+ checkpoint_callback.best_model_score.cpu().detach().numpy()
+ ),
+ })
@staticmethod
- def _get_full_model_name(model_path: str, checkpoint_callback: Checkpoint) -> None:
+ def _get_full_model_name(model_path: str, checkpoint_callback: Checkpoint) -> str:
"""Returns model name which is string `model_path` appended to `checkpoint_callback.dirpath`."""
- return
+ if hasattr(checkpoint_callback, "dirpath"):
+ model_path = os.path.normpath(model_path)
+ expected_model_path = os.path.normpath(checkpoint_callback.dirpath)
+ if not model_path.startswith(expected_model_path):
+ raise ValueError(f"{model_path} was expected to start with {expected_model_path}.")
+ # Remove extension from filepath
+ filepath, _ = os.path.splitext(model_path[len(expected_model_path) + 1 :])
+ return filepath.replace(os.sep, "/")
+ return model_path.replace(os.sep, "/")
@classmethod
- def _get_full_model_names_from_exp_structure(cls, exp_structure: dict[str, Any], namespace: str) -> set[None]:
+ def _get_full_model_names_from_exp_structure(cls, exp_structure: dict[str, Any], namespace: str) -> set[str]:
"""Returns all paths to properties which were already logged in `namespace`"""
- return set()
+ structure_keys: list[str] = namespace.split(cls.LOGGER_JOIN_CHAR)
+ for key in structure_keys:
+ exp_structure = exp_structure[key]
+ uploaded_models_dict = exp_structure
+ return set(cls._dict_paths(uploaded_models_dict))
@classmethod
def _dict_paths(cls, d: dict[str, Any], path_in_build: Optional[str] = None) -> Generator:
diff --git a/src/pytorch_lightning/README.md b/src/pytorch_lightning/README.md
index f3fb8cb2fd2b3..8e2bca207820c 100644
--- a/src/pytorch_lightning/README.md
+++ b/src/pytorch_lightning/README.md
@@ -252,9 +252,12 @@ trainer = Trainer(logger=loggers.CometLogger())
# mlflow
trainer = Trainer(logger=loggers.MLFlowLogger())
-# neptune
+# neptune 2.x
trainer = Trainer(logger=loggers.NeptuneLogger())
+# neptune 3.x
+trainer = Trainer(logger=loggers.NeptuneScaleLogger())
+
# ... and dozens more
```
diff --git a/tests/tests_pytorch/loggers/test_neptune.py b/tests/tests_pytorch/loggers/test_neptune.py
index 5fd82114f5a7a..a6b21d5ce2bc3 100644
--- a/tests/tests_pytorch/loggers/test_neptune.py
+++ b/tests/tests_pytorch/loggers/test_neptune.py
@@ -461,7 +461,8 @@ def test_neptune_scale_logger_finalize(neptune_scale_logger):
"""Test finalize method sets status correctly."""
logger, mock_run = neptune_scale_logger
logger.finalize("success")
- assert mock_run._status == "success"
+ expected_key = logger._construct_path_with_prefix("status")
+ mock_run.log_configs.assert_any_call({expected_key: "success"})
@pytest.mark.skipif(not _NEPTUNE_SCALE_AVAILABLE, reason="Neptune-Scale is required for this test.")
@@ -472,13 +473,20 @@ def test_neptune_scale_logger_invalid_run():
@pytest.mark.skipif(not _NEPTUNE_SCALE_AVAILABLE, reason="Neptune-Scale is required for this test.")
-def test_neptune_scale_logger_log_model_summary(neptune_scale_logger, caplog):
- """Test that log_model_summary shows warning."""
- logger = NeptuneScaleLogger(log_model_checkpoints=True)
- model = BoringModel()
+def test_neptune_scale_logger_log_model_summary(neptune_scale_logger, monkeypatch):
+ from neptune_scale.types import File
+ model = BoringModel()
+ logger, mock_run = neptune_scale_logger
+ # Patch assign_files to track calls
+ assign_files_mock = mock.MagicMock()
+ monkeypatch.setattr(mock_run, "assign_files", assign_files_mock)
logger.log_model_summary(model)
- assert "Neptune Scale does not support logging model summaries" in caplog.text
+ # Check that assign_files was called with the correct key and a File instance
+ called_args = assign_files_mock.call_args[0][0]
+ assert list(called_args.keys())[0].endswith("model/summary")
+ file_val = list(called_args.values())[0]
+ assert isinstance(file_val, File)
@pytest.mark.skipif(not _NEPTUNE_SCALE_AVAILABLE, reason="Neptune-Scale is required for this test.")
@@ -496,3 +504,32 @@ def test_neptune_scale_logger_with_prefix(neptune_scale_logger):
metrics = {"loss": 1.23}
logger.log_metrics(metrics, step=5)
mock_run.log_metrics.assert_called_once_with({"training/loss": 1.23}, step=5)
+
+
+@pytest.mark.skipif(not _NEPTUNE_SCALE_AVAILABLE, reason="Neptune-Scale is required for this test.")
+def test_neptune_scale_logger_after_save_checkpoint(neptune_scale_logger):
+ logger, mock_run = neptune_scale_logger
+ models_root_dir = os.path.join("path", "to", "models")
+ cb_mock = MagicMock(
+ dirpath=models_root_dir,
+ last_model_path=os.path.join(models_root_dir, "last"),
+ best_k_models={
+ f"{os.path.join(models_root_dir, 'model1')}": None,
+ f"{os.path.join(models_root_dir, 'model2/with/slashes')}": None,
+ },
+ best_model_path=os.path.join(models_root_dir, "best_model"),
+ best_model_score=None,
+ )
+ logger.after_save_checkpoint(cb_mock)
+ prefix = logger._prefix
+ model_key_prefix = f"{prefix}/model" if prefix else "model"
+ expected_calls = [
+ call.log_configs({f"{model_key_prefix}/checkpoints/model1": os.path.join(models_root_dir, "model1")}),
+ call.log_configs({
+ f"{model_key_prefix}/checkpoints/model2/with/slashes": os.path.join(models_root_dir, "model2/with/slashes")
+ }),
+ call.log_configs({f"{model_key_prefix}/checkpoints/last": os.path.join(models_root_dir, "last")}),
+ call.log_configs({f"{model_key_prefix}/checkpoints/best_model": os.path.join(models_root_dir, "best_model")}),
+ call.log_configs({f"{model_key_prefix}/best_model_path": os.path.join(models_root_dir, "best_model")}),
+ ]
+ mock_run.log_configs.assert_has_calls(expected_calls, any_order=True)