open-edge-platform
diff --git a/‎library/src/otx/backend/native/cli/utils.py‎
Lines changed: 9 additions & 2 deletions b/‎library/src/otx/backend/native/cli/utils.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎library/src/otx/backend/native/engine.py‎
Lines changed: 9 additions & 3 deletions b/‎library/src/otx/backend/native/engine.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎library/src/otx/backend/native/models/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎library/src/otx/backend/native/models/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎library/src/otx/backend/native/models/base.py‎
Lines changed: 47 additions & 22 deletions b/‎library/src/otx/backend/native/models/base.py‎
Lines changed: 47 additions & 22 deletions
diff --git a/‎library/src/otx/backend/native/models/classification/factory.py‎
Lines changed: 5 additions & 5 deletions b/‎library/src/otx/backend/native/models/classification/factory.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎library/src/otx/backend/native/models/classification/hlabel_models/base.py‎
Lines changed: 7 additions & 2 deletions b/‎library/src/otx/backend/native/models/classification/hlabel_models/base.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎library/src/otx/backend/native/models/classification/hlabel_models/efficientnet.py‎
Lines changed: 1 addition & 6 deletions b/‎library/src/otx/backend/native/models/classification/hlabel_models/efficientnet.py‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎library/src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py‎
Lines changed: 1 addition & 6 deletions b/‎library/src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎library/src/otx/backend/native/models/classification/hlabel_models/timm_model.py‎
Lines changed: 1 addition & 6 deletions b/‎library/src/otx/backend/native/models/classification/hlabel_models/timm_model.py‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎library/src/otx/backend/native/models/classification/hlabel_models/torchvision_model.py‎
Lines changed: 1 addition & 1 deletion b/‎library/src/otx/backend/native/models/classification/hlabel_models/torchvision_model.py‎
Lines changed: 1 addition & 1 deletion
@@ -34,17 +34,21 @@ def get_otx_root_path() -> Path:
 RECIPE_PATH = get_otx_root_path() / "recipe"
 
 
-def list_models(task: OTXTaskType | None = None, pattern: str | None = None, print_table: bool = False) -> list[str]:
+def list_models(
+    task: OTXTaskType | None = None, pattern: str | None = None, print_table: bool = False, return_recipes: bool = False
+) -> list[str]:
     """Returns a list of available models for training.
 
     Args:
         task (OTXTaskType | None, optional): Recipe Filter by Task.
         pattern (Optional[str], optional): A string pattern to filter the list of available models. Defaults to None.
         print_table (bool, optional): Output the recipe information as a Rich.Table.
             This is primarily used for `otx find` in the CLI.
+        return_recipes (bool, optional): If True, return the recipe paths instead of model names.
 
     Returns:
-        list[str]: A list of available models for pretraining.
+        list[str]: A list of available models or recipes for fine-tuning.
+
 
     Example:
         # Return all available model list.
@@ -94,4 +98,7 @@ def list_models(task: OTXTaskType | None = None, pattern: str | None = None, pri
             )
         console.print(table, width=console.width, justify="center")
 
+    if return_recipes:
+        return recipe_list
+
     return list({Path(recipe).stem for recipe in recipe_list})
@@ -85,12 +85,13 @@ class OTXEngine(Engine):
 
     def __init__(
         self,
-        model: OTXModel | PathLike,
+        model: OTXModel | PathLike | str,
         data: OTXDataModule | PathLike,
         work_dir: PathLike = "./otx-workspace",
         checkpoint: PathLike | None = None,
         device: DeviceType = DeviceType.auto,
         num_devices: int = 1,
+        task: OTXTaskType | None = None,
         **kwargs,
     ):
         """Initializes the OTX Engine.
@@ -103,6 +104,8 @@ def __init__(
             checkpoint (PathLike | None, optional): Path to the checkpoint file (model weights). Defaults to None.
             device (DeviceType, optional): The device type to use. Defaults to DeviceType.auto.
             num_devices (int, optional): The number of devices to use. If it is 2 or more, it will behave as multi-gpu.
+            task (OTXTaskType | None, optional): The task type to use. Useful when you provide model name
+                and this model can be used for multiple tasks. Defaults to None.
             **kwargs: Additional keyword arguments for pl.Trainer.
         """
         self._cache = TrainerArgumentsCache(**kwargs)
@@ -112,10 +115,13 @@ def __init__(
         if not isinstance(data, (OTXDataModule, str, os.PathLike)):
             msg = f"data should be OTXDataModule or PathLike, but got {type(data)}"
             raise TypeError(msg)
+        if task is not None and isinstance(data, OTXDataModule) and task != data.task:
+            msg = f"task and data.task should be the same, but got {task} and {data.task}"
+            raise ValueError(msg)
         self._auto_configurator = AutoConfigurator(
             data_root=data if isinstance(data, (str, os.PathLike)) else None,
-            task=data.task if isinstance(data, OTXDataModule) else None,
-            model_config_path=None if isinstance(model, OTXModel) else model,
+            task=data.task if isinstance(data, OTXDataModule) else task,
+            model=None if isinstance(model, OTXModel) else model,
         )
         self._datamodule: OTXDataModule = (
             data if isinstance(data, OTXDataModule) else self._auto_configurator.get_datamodule()
 
@@ -10,7 +10,7 @@
     TVModel,
     VisionTransformer,
 )
-from .detection import ATSS, RTDETR, SSD, DFine, RTMDet
+from .detection import ATSS, RTDETR, SSD, YOLOX, DEIMDFine, DFine, RTMDet
 from .instance_segmentation import MaskRCNN, MaskRCNNTV, RTMDetInst
 from .keypoint_detection import RTMPose
 from .segmentation import DinoV2Seg, LiteHRNet, SegNext
@@ -19,6 +19,8 @@
     "ATSS",
     "RTDETR",
     "SSD",
+    "YOLOX",
+    "DEIMDFine",
     "DFine",
     "DinoV2Seg",
     "EfficientNet",
 
@@ -33,7 +33,6 @@
 from otx.backend.native.utils.utils import (
     ensure_callable,
     is_ckpt_for_finetuning,
-    is_ckpt_from_otx_v1,
     remove_state_dict_prefix,
 )
 from otx.config.data import TileConfig
@@ -102,8 +101,24 @@ def _default_scheduler_callable(
 class OTXModel(LightningModule):
     """Base class for the models used in OTX.
 
+    This class is a subclass of `LightningModule`. It is not intended to be used directly.
+
     Args:
-        num_classes: Number of classes this model can predict.
+        label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
+            If `int` is given, label info will be constructed from number of classes,
+            if `Sequence` is given, label info will be constructed from the sequence of label names.
+        model_name (str, optional): Name of the model. Defaults to "OTXModel".
+        optimizer (OptimizerCallable, optional): Optimizer callable. Defaults to DefaultOptimizerCallable.
+        scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Scheduler callable.
+            Defaults to DefaultSchedulerCallable.
+        metric (MetricCallable, optional): Metric callable. Defaults to NullMetricCallable.
+        torch_compile (bool, optional): Whether to use torch compile. Defaults to False.
+        tile_config (TileConfig | dict, optional): Configuration for tiling. Defaults to TileConfig(enable_tiler=False).
+        data_input_params (DataInputParams | dict | None, optional): Parameters for image preprocessing.
+            This parameter contains image input size, mean, and std, that is used to preprocess the input image.
+            If None is given, default parameters for the specific model will be used.
+            In most cases you don't need to set this parameter unless you change the image size or pretrained weights.
+            Defaults to None.
 
     Attributes:
         explain_mode: If true, `self.predict_step()` will produce a XAI output as well
@@ -118,7 +133,7 @@ class OTXModel(LightningModule):
     def __init__(
         self,
         label_info: LabelInfoTypes | int | Sequence,
-        data_input_params: DataInputParams | dict,
+        data_input_params: DataInputParams | dict | None = None,
         task: OTXTaskType | None = None,
         model_name: str = "OTXModel",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -133,7 +148,10 @@ def __init__(
             label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
                 If `int` is given, label info will be constructed from number of classes,
                 if `Sequence` is given, label info will be constructed from the sequence of label names.
-            data_input_params (DataInputParams | dict): Parameters of the input data such as input size, mean, and std.
+            data_input_params (DataInputParams | dict | None, optional): Parameters for image preprocessing.
+                This parameter contains image input size, mean, and std, that is used to preprocess the input image.
+                If None is given, default parameters for the specific model will be used.
+                Defaults to None.
             model_name (str, optional): Name of the model. Defaults to "OTXModel".
             optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
             scheduler (LRSchedulerCallable | LRSchedulerListCallable): Callable for the learning rate scheduler.
@@ -148,11 +166,17 @@ def __init__(
         super().__init__()
 
         self._label_info = self._dispatch_label_info(label_info)
+        self.model_name = model_name
         if isinstance(data_input_params, dict):
             data_input_params = DataInputParams(**data_input_params)
+        elif data_input_params is None:
+            data_input_params = (
+                self._default_preprocessing_params[self.model_name]
+                if isinstance(self._default_preprocessing_params, dict)
+                else self._default_preprocessing_params
+            )
         self._check_preprocessing_params(data_input_params)
         self.data_input_params = data_input_params
-        self.model_name = model_name
         self.model = self._create_model()
         self.optimizer_callable = ensure_callable(optimizer)
         self.scheduler_callable = ensure_callable(scheduler)
@@ -455,11 +479,7 @@ def on_load_checkpoint(self, checkpoint: dict[str, Any]) -> None:
 
     def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
         """Load state dict incrementally."""
-        ckpt_label_info: LabelInfo | None = (
-            ckpt.get("hyper_parameters", {}).get("label_info")
-            if not is_ckpt_from_otx_v1(ckpt)
-            else self.get_ckpt_label_info_v1(ckpt)
-        )
+        ckpt_label_info: LabelInfo | None = ckpt.get("hyper_parameters", {}).get("label_info")
 
         if ckpt_label_info is None:
             msg = "Checkpoint should have `label_info`."
@@ -485,7 +505,7 @@ def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) -
             )
 
         # Model weights
-        state_dict: dict[str, Any] = ckpt.get("state_dict", {}) if not is_ckpt_from_otx_v1(ckpt) else ckpt
+        state_dict: dict[str, Any] = ckpt.get("state_dict", {})
 
         if state_dict is None or state_dict == {}:
             msg = "Checkpoint should have `state_dict`."
@@ -501,21 +521,13 @@ def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
         If checkpoint's label_info and OTXLitModule's label_info are different,
         load_state_pre_hook for smart weight loading will be registered.
         """
-        if is_ckpt_from_otx_v1(ckpt):
-            msg = "The checkpoint comes from OTXv1, checkpoint keys will be updated automatically."
-            warnings.warn(msg, stacklevel=2)
-            state_dict = self.load_from_otx_v1_ckpt(ckpt)
-        elif is_ckpt_for_finetuning(ckpt):
+        if is_ckpt_for_finetuning(ckpt):
             self.on_load_checkpoint(ckpt)
             state_dict = ckpt["state_dict"]
         else:
             state_dict = ckpt
         return super().load_state_dict(state_dict, *args, **kwargs)
 
-    def load_from_otx_v1_ckpt(self, ckpt: dict[str, Any]) -> dict:
-        """Load the previous OTX ckpt according to OTX2.0."""
-        raise NotImplementedError
-
     @staticmethod
     def get_ckpt_label_info_v1(ckpt: dict) -> LabelInfo:
         """Generate label info from OTX v1 checkpoint."""
@@ -561,6 +573,15 @@ def _set_label_info(self, label_info: LabelInfoTypes) -> None:
 
         self._label_info = new_label_info
 
+    @property
+    @abstractmethod
+    def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
+        """Parameters for image preprocessing.
+
+        Each model architecture must implement this property, returning a DataInputParams
+        containing the image input size, mean, and std, that is used to preprocess the input image.
+        """
+
     @property
     def num_classes(self) -> int:
         """Returns model's number of classes. Can be redefined at the model's level."""
@@ -594,9 +615,13 @@ def _customize_outputs(
 
     def forward(
         self,
-        inputs: OTXDataBatch,
-    ) -> OTXPredBatch | OTXBatchLossEntity:
+        inputs: OTXDataBatch | Tensor,
+    ) -> OTXPredBatch | OTXBatchLossEntity | Tensor:
         """Model forward function."""
+        # Simple forward
+        if isinstance(inputs, Tensor):
+            return self.forward_for_tracing(inputs)
+
         # If customize_inputs is overridden
         if isinstance(inputs, OTXTileBatchDataEntity):
             return self.forward_tiles(inputs)
 
@@ -50,7 +50,7 @@ class MobileNetV3:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | dict,
+        data_input_params: DataInputParams | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         freeze_backbone: bool = False,
         model_name: Literal["mobilenetv3_large", "mobilenetv3_small"] = "mobilenetv3_large",
@@ -120,7 +120,7 @@ class EfficientNet:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: Literal[
             "efficientnet_b0",
@@ -192,7 +192,7 @@ class TimmModel:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: str = "tf_efficientnetv2_s.in21k",
         freeze_backbone: bool = False,
@@ -279,7 +279,7 @@ class TVModel:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: str = "efficientnet_v2_s",
         freeze_backbone: bool = False,
@@ -361,7 +361,7 @@ class VisionTransformer:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: Literal[
             "vit-tiny",
 
@@ -38,7 +38,8 @@ class OTXHlabelClsModel(OTXModel):
 
     Args:
         label_info (HLabelInfo): Information about the hierarchical labels.
-        data_input_params (DataInputParams): Parameters for data input.
+        data_input_params (DataInputParams | None, optional): Parameters for image data preprocessing. If None is given,
+            default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "hlabel_classification_model".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -52,7 +53,7 @@ class OTXHlabelClsModel(OTXModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         model_name: str = "hlabel_classification_model",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -233,3 +234,7 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]:
             return self.model(images=image, mode="explain")
 
         return self.model(images=image, mode="tensor")
+
+    @property
+    def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
+        return DataInputParams(input_size=(224, 224), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
@@ -20,7 +20,6 @@
     AsymmetricAngularLossWithIgnore,
 )
 from otx.backend.native.models.classification.necks.gap import GlobalAveragePooling
-from otx.backend.native.models.utils.support_otx_v1 import OTXv1Helper
 from otx.backend.native.schedulers import LRSchedulerListCallable
 from otx.metrics.accuracy import HLabelClsMetricCallable
 from otx.types.label import HLabelInfo
@@ -37,7 +36,7 @@ class EfficientNetHLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         model_name: Literal[
             "efficientnet_b0",
             "efficientnet_b1",
@@ -86,7 +85,3 @@ def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type:
             multiclass_loss=nn.CrossEntropyLoss(),
             multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"),
         )
-
-    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.") -> dict:
-        """Load the previous OTX ckpt according to OTX2.0."""
-        return OTXv1Helper.load_cls_effnet_b0_ckpt(state_dict, "hlabel", add_prefix)
@@ -21,7 +21,6 @@
     AsymmetricAngularLossWithIgnore,
 )
 from otx.backend.native.models.classification.necks.gap import GlobalAveragePooling
-from otx.backend.native.models.utils.support_otx_v1 import OTXv1Helper
 from otx.backend.native.schedulers import LRSchedulerListCallable
 from otx.data.entity.base import OTXBatchLossEntity
 from otx.data.entity.torch import OTXDataBatch, OTXPredBatch
@@ -41,7 +40,7 @@ class MobileNetV3HLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         model_name: Literal["mobilenetv3_large", "mobilenetv3_small"] = "mobilenetv3_large",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -82,10 +81,6 @@ def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type:
             multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"),
         )
 
-    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.") -> dict:
-        """Load the previous OTX ckpt according to OTX2.0."""
-        return OTXv1Helper.load_cls_mobilenet_v3_ckpt(state_dict, "hlabel", add_prefix)
-
     def _customize_inputs(self, inputs: OTXDataBatch) -> dict[str, Any]:
         if self.training:
             mode = "loss"
 
@@ -20,7 +20,6 @@
     AsymmetricAngularLossWithIgnore,
 )
 from otx.backend.native.models.classification.necks.gap import GlobalAveragePooling
-from otx.backend.native.models.utils.support_otx_v1 import OTXv1Helper
 from otx.backend.native.schedulers import LRSchedulerListCallable
 from otx.metrics.accuracy import HLabelClsMetricCallable
 from otx.types.label import HLabelInfo
@@ -51,7 +50,7 @@ class TimmModelHLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         model_name: str = "tf_efficientnetv2_s.in21k",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -85,7 +84,3 @@ def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type:
             multiclass_loss=nn.CrossEntropyLoss(),
             multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"),
         )
-
-    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.") -> dict:
-        """Load the previous OTX ckpt according to OTX2.0."""
-        return OTXv1Helper.load_cls_effnet_v2_ckpt(state_dict, "hlabel", add_prefix)
@@ -46,7 +46,7 @@ class TVModelHLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams,
+        data_input_params: DataInputParams | None = None,
         model_name: str = "efficientnet_v2_s",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,