-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Open
Labels
Description
Bug description
I am using Anomalib's 1.1.1 and I got the same error with different versions of Anomalib. The problem is basically that in this lines in src/lightning/pytorch/loggers/mlflow.py
the returned value is None
for not local paths (when I use a remote MLFlow server), that creates an incompatibility in src/lightning/pytorch/cli.py
because it recieves None
as log_dir and I get an AssertionError
.
def save_dir(self) -> Optional[str]:
"""The root file directory in which MLflow experiments are saved.
Return:
Local path to the root experiment directory if the tracking uri is local.
Otherwise returns `None`.
"""
if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)
return None
What version are you seeing the problem on?
v2.4
How to reproduce the bug
import os
import torch
from lightning.pytorch import Trainer
from lightning.pytorch.loggers import MLFlowLogger
from anomalib.engine import Engine
from anomalib.data import get_datamodule
from anomalib.models import get_model
from anomalib.callbacks import LoadModelCallback, get_callbacks
from anomalib.models.image.fastflow.lightning_model import Fastflow
from anomalib.models.components import AnomalyModule
from anomalib.metrics import AUROC
import mlflow
os.environ['TORCH_CUDA_ARCH_LIST'] = '7.5'
CONFIG_PATH = "config.yml"
mlflow_configuration = {
"trainer": {
"logger": {
"class_path": "anomalib.loggers.mlflow.AnomalibMLFlowLogger",
"init_args": {
"experiment_name": "experiment_name",
"run_name": run_name,
"tracking_uri": "tracking_uri",
"log_model": log_model,
"save_dir": "save_dir",
"tags": {
"dataset": "dataset",
"test": "test",
"preTrained": "preTrained",
"patience": "patience",
# "transformations": transformations
},
},
},
},
}
# Load and parse the config
engine, model, datamodule = Engine.from_config(CONFIG_PATH, **mlflow_configuration)
engine.fit(
model=model,
datamodule=datamodule
)
engine.test(datamodule=datamodule)
Is important to use a MLFlow Server!
Error messages and logs
╭─────────────────────── Traceback (most recent call last) ────────────────────────╮
│ /home/user/faceswaps/anomalib/anomalib_test.py:63 in <module> │
│ │
│ 60 │
│ 61 │
│ 62 │
│ ❱ 63 engine.fit( │
│ 64 │ model=model, │
│ 65 │ datamodule=datamodule │
│ 66 ) │
│ │
│ /home/user/.local/lib/python3.11/site-packages/anomalib/engine/engine.py:541 │
│ in fit │
│ │
│ 538 │ │ │ # if the model is zero-shot or few-shot, we only need to run │
│ 539 │ │ │ self.trainer.validate(model, val_dataloaders, datamodule=data │
│ 540 │ │ else: │
│ ❱ 541 │ │ │ self.trainer.fit(model, train_dataloaders, val_dataloaders, d │
│ 542 │ │
│ 543 │ def validate( │
│ 544 │ │ self, │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/trainer/trai │
│ ner.py:538 in fit │
│ │
│ 535 │ │ self.state.fn = TrainerFn.FITTING │
│ 536 │ │ self.state.status = TrainerStatus.RUNNING │
│ 537 │ │ self.training = True │
│ ❱ 538 │ │ call._call_and_handle_interrupt( │
│ 539 │ │ │ self, self._fit_impl, model, train_dataloaders, val_dataloade │
│ 540 │ │ ) │
│ 541 │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/trainer/call │
│ .py:47 in _call_and_handle_interrupt │
│ │
│ 44 │ try: │
│ 45 │ │ if trainer.strategy.launcher is not None: │
│ 46 │ │ │ return trainer.strategy.launcher.launch(trainer_fn, *args, tra │
│ ❱ 47 │ │ return trainer_fn(*args, **kwargs) │
│ 48 │ │
│ 49 │ except _TunerExitException: │
│ 50 │ │ _call_teardown_hook(trainer) │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/trainer/trai │
│ ner.py:574 in _fit_impl │
│ │
│ 571 │ │ │ model_provided=True, │
│ 572 │ │ │ model_connected=self.lightning_module is not None, │
│ 573 │ │ ) │
│ ❱ 574 │ │ self._run(model, ckpt_path=ckpt_path) │
│ 575 │ │ │
│ 576 │ │ assert self.state.stopped │
│ 577 │ │ self.training = False │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/trainer/trai │
│ ner.py:943 in _run │
│ │
│ 940 │ │ log.debug(f"{self.__class__.__name__}: preparing data") │
│ 941 │ │ self._data_connector.prepare_data() │
│ 942 │ │ │
│ ❱ 943 │ │ call._call_setup_hook(self) # allow user to set up LightningModu │
│ 944 │ │ log.debug(f"{self.__class__.__name__}: configuring model") │
│ 945 │ │ call._call_configure_model(self) │
│ 946 │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/trainer/call │
│ .py:103 in _call_setup_hook │
│ │
│ 100 │ │
│ 101 │ if trainer.datamodule is not None: │
│ 102 │ │ _call_lightning_datamodule_hook(trainer, "setup", stage=fn) │
│ ❱ 103 │ _call_callback_hooks(trainer, "setup", stage=fn) │
│ 104 │ _call_lightning_module_hook(trainer, "setup", stage=fn) │
│ 105 │ │
│ 106 │ trainer.strategy.barrier("post_setup") │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/trainer/call │
│ .py:218 in _call_callback_hooks │
│ │
│ 217 │ │ │ with trainer.profiler.profile(f"[Callback]{callback.state_key} │
│ ❱ 218 │ │ │ │ fn(trainer, trainer.lightning_module, *args, **kwargs) │
│ 219 │ │
│ 220 │ if pl_module: │
│ 221 │ │ # restore current_fx when nested context │
│ │
│ /home/user/.local/lib/python3.11/site-packages/lightning/pytorch/cli.py:252 │
│ in setup │
│ │
│ 249 │ │ │
│ 250 │ │ if self.save_to_log_dir: │
│ 251 │ │ │ log_dir = trainer.log_dir # this broadcasts the directory │
│ ❱ 252 │ │ │ assert log_dir is not None │
│ 253 │ │ │ config_path = os.path.join(log_dir, self.config_filename) │
│ 254 │ │ │ fs = get_filesystem(log_dir) │
│ 255 │ │
Environment
Current environment
#- PyTorch Lightning Version (e.g., 2.4.0): 2.4.0
#- PyTorch Version (e.g., 2.4): 2.2.0
#- Python version (e.g., 3.12): 3.11.4
#- OS (e.g., Linux): Ubuntu 22.04 LTS
#- CUDA/cuDNN version: 12.2
#- GPU models and configuration: NVIDIA GeForce RTX 3090
#- How you installed Lightning(`conda`, `pip`, source): pip
More info
No response