diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
index 9f22e12cd8..d15b34e2fd 100644
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -20,18 +20,20 @@ jobs:
         uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
         with:
           python-version: "3.12"
+      - name: Install build tools
+        run: python -m pip install build
       - name: Build sdist
-        run: python -m build --sdist
+        run: python -m build --sdist library/
       - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: artifact-sdist
-          path: dist/*.tar.gz
+          path: library/dist/*.tar.gz
       - name: Build wheel
-        run: python -m build --wheel
+        run: python -m build --wheel library/
       - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: artifact-wheel
-          path: dist/*.whl
+          path: library/dist/*.whl
 
   publish_package:
     name: Publish package
@@ -45,7 +47,7 @@ jobs:
       - name: Download artifacts
         uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
-          path: dist
+          path: library/dist
           pattern: artifact-*
           merge-multiple: true
       # to determine where to publish the package distribution to PyPI or TestPyPI
@@ -60,7 +62,7 @@ jobs:
         uses: svenstaro/upload-release-action@81c65b7cd4de9b2570615ce3aad67a41de5b1a13 # v2
         with:
           repo_token: ${{ secrets.GITHUB_TOKEN }}
-          file: dist/*
+          file: library/dist/*
           tag: ${{ github.ref }}
           overwrite: true
           file_glob: true
@@ -73,3 +75,4 @@ jobs:
         with:
           repository-url: https://test.pypi.org/legacy/
           verbose: true
+          packages-dir: library/dist
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48e9ce748a..cd02051d45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 All notable changes to this project will be documented in this file.
 
-## \[Unreleased\]
+## \[2.6.0\]
 
 ### New features
 
@@ -13,6 +13,29 @@ All notable changes to this project will be documented in this file.
 - Add DEIM-DFine model for Object Detection
   (<https://github.com/open-edge-platform/training_extensions/pull/4446>)
 
+### Bug fixes
+
+- Fix overriding train parameters
+  (<https://github.com/open-edge-platform/training_extensions/pull/4496>)
+- Fix adaptive batch size to run on CPU
+  (<https://github.com/open-edge-platform/training_extensions/pull/4499>)
+- Workaround for batch size search on XPU devices
+  (<https://github.com/open-edge-platform/training_extensions/pull/4513>)
+- Fix UFLow configuration
+  (<https://github.com/open-edge-platform/training_extensions/pull/4504>)
+- Fix cache args
+  (<https://github.com/open-edge-platform/training_extensions/pull/4522>)
+- Fix finding task type in IR
+  (<https://github.com/open-edge-platform/training_extensions/pull/4576>)
+- Fix loading checkpoint after 1st round of training for DFine-X model
+  (<https://github.com/open-edge-platform/training_extensions/pull/4738>)
+- Fix input size configuration during validation for DFine model
+  (<https://github.com/open-edge-platform/training_extensions/pull/4666>)
+- Fix training on CPU
+  (https://github.com/open-edge-platform/training_extensions/pull/4788)
+- Fix OOM bug on XPU
+  (<https://github.com/open-edge-platform/training_extensions/pull/4872>)
+
 ## \[2.5.0\]
 
 ### Enhancements
diff --git a/library/pyproject.toml b/library/pyproject.toml
index c05a1d9b95..5d61f155bd 100644
--- a/library/pyproject.toml
+++ b/library/pyproject.toml
@@ -39,8 +39,7 @@ dependencies = [
     "einops==0.8.1",
     "decord==0.6.0",
     "typeguard>=4.3,<4.5",
-    # TODO(ashwinvaidya17): https://github.com/openvinotoolkit/anomalib/issues/2126
-    "setuptools<70",
+    "setuptools==78.1.1",
     "lightning==2.4.0",
     "torchmetrics==1.6.0",
     "pytorchcv==0.0.67",
@@ -48,8 +47,8 @@ dependencies = [
     "openvino==2025.2",
     "openvino-model-api==0.3.0.2",
     "onnx==1.17.0",
+    "onnxconverter-common==1.16.0",
     "onnxscript==0.5.3",
-    "onnxconverter-common==1.14.0",
     "nncf==2.17.0",
     "anomalib[core]==1.1.3",
     "numpy<2.0",
diff --git a/library/src/otx/__init__.py b/library/src/otx/__init__.py
index ab6963c0c1..117520f3c9 100644
--- a/library/src/otx/__init__.py
+++ b/library/src/otx/__init__.py
@@ -3,7 +3,7 @@
 
 """OpenVINO Training Extensions."""
 
-__version__ = "2.6.0dev"
+__version__ = "2.7.0dev"
 
 import os
 from pathlib import Path
diff --git a/library/src/otx/backend/native/callbacks/batchsize_finder.py b/library/src/otx/backend/native/callbacks/batchsize_finder.py
index a96e0898af..dc9ec21f7c 100644
--- a/library/src/otx/backend/native/callbacks/batchsize_finder.py
+++ b/library/src/otx/backend/native/callbacks/batchsize_finder.py
@@ -27,7 +27,7 @@ class BatchSizeFinder(Callback):
 
     def __init__(
         self,
-        steps_per_trial: int = 3,
+        steps_per_trial: int = 5,
     ) -> None:
         self._steps_per_trial = steps_per_trial
 
@@ -52,11 +52,12 @@ def _try_loop_run(trainer: Trainer) -> None:
     loop.run()
 
 
-def _scale_batch_reset_params(trainer: Trainer, steps_per_trial: int) -> None:
+def _scale_batch_reset_params(trainer: Trainer, steps_per_trial: int, max_epochs: int = 1) -> None:
     trainer.logger = DummyLogger() if trainer.logger is not None else None
     trainer.callbacks = []
-    # For XPU devices 1 epoch sometimes is not enough to catch an error
-    max_epochs = 2 if is_xpu_available() else 1
+    # For XPU devices 1 epoch sometimes is not enough to catch an error.
+    # Emperically enlarge this to 15 iterations (steps_per_trial * epochs)
+    max_epochs = 3 if is_xpu_available() else 1
 
     loop = trainer._active_loop  # noqa: SLF001
     if loop is None:
diff --git a/library/src/otx/backend/native/engine.py b/library/src/otx/backend/native/engine.py
index c053c3c35b..1f566d70ed 100644
--- a/library/src/otx/backend/native/engine.py
+++ b/library/src/otx/backend/native/engine.py
@@ -41,7 +41,7 @@
 from otx.types.export import OTXExportFormatType
 from otx.types.precision import OTXPrecisionType
 from otx.types.task import OTXTaskType
-from otx.utils.device import is_xpu_available
+from otx.utils.device import get_available_device, is_xpu_available
 from otx.utils.utils import measure_flops
 
 if TYPE_CHECKING:
@@ -915,6 +915,8 @@ def configure_accelerator(self) -> None:
                     ],
                 )
                 self._cache.args["precision"] = None
+        elif (self._device.accelerator == DeviceType.cpu) or (get_available_device() == "cpu"):
+            self._cache.args["precision"] = "32"
 
     def configure_loggers(self, logger: Logger | Iterable[Logger] | bool | None = None) -> Logger | Iterable[Logger]:
         """Sets up the loggers for the trainer.
diff --git a/library/src/otx/backend/native/models/detection/d_fine.py b/library/src/otx/backend/native/models/detection/d_fine.py
index 2727ce8044..2c01388f79 100644
--- a/library/src/otx/backend/native/models/detection/d_fine.py
+++ b/library/src/otx/backend/native/models/detection/d_fine.py
@@ -92,6 +92,7 @@ def _create_model(self, num_classes: int | None = None) -> DETR:
         decoder = DFINETransformer(
             model_name=self.model_name,
             num_classes=num_classes,
+            eval_spatial_size=self.data_input_params.input_size,
         )
         criterion = DFINECriterion(
             weight_dict={
@@ -157,3 +158,17 @@ def _optimization_config(self) -> dict[str, Any]:
                 },
             },
         }
+
+    def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
+        """Load state dictionary from checkpoint state dictionary.
+
+        If a RuntimeError occurs due to size mismatch, non-trainable anchors and valid_mask
+        are removed from the checkpoint before loading.
+        """
+        try:
+            return super().load_state_dict(ckpt, *args, **kwargs)
+        except RuntimeError:
+            # Remove non-trainable anchors and valid_mask from the checkpoint to avoid size mismatch
+            ckpt.pop("model.decoder.anchors")
+            ckpt.pop("model.decoder.valid_mask")
+            return super().load_state_dict(ckpt, *args, strict=False, **kwargs)
diff --git a/library/src/otx/backend/native/models/detection/heads/dfine_decoder.py b/library/src/otx/backend/native/models/detection/heads/dfine_decoder.py
index 253190dedf..8e6e63cd6a 100644
--- a/library/src/otx/backend/native/models/detection/heads/dfine_decoder.py
+++ b/library/src/otx/backend/native/models/detection/heads/dfine_decoder.py
@@ -408,7 +408,7 @@ class DFINETransformerModule(nn.Module):
         num_denoising (int, optional): Number of denoising samples. Defaults to 100.
         label_noise_ratio (float, optional): Ratio of label noise. Defaults to 0.5.
         box_noise_scale (float, optional): Scale of box noise. Defaults to 1.0.
-        eval_spatial_size (list[int], optional): Spatial size for evaluation. Defaults to [640, 640].
+        eval_spatial_size (tuple[int, int], optional): Spatial size for evaluation. Defaults to (640, 640).
         eval_idx (int, optional): Evaluation index. Defaults to -1.
         reg_scale (float, optional): The weight curvature. Defaults to 4.0.
         reg_max (int, optional): The number of bins for box regression. Defaults to 32.
@@ -431,7 +431,7 @@ def __init__(
         num_denoising: int = 100,
         label_noise_ratio: float = 0.5,
         box_noise_scale: float = 1.0,
-        eval_spatial_size: list[int] = [640, 640],  # noqa: B006
+        eval_spatial_size: tuple[int, int] = (640, 640),
         eval_idx: int = -1,
         reg_scale: float = 4.0,
         reg_max: int = 32,
@@ -693,7 +693,6 @@ def _get_decoder_input(
 
         if memory.shape[0] > 1:
             anchors = anchors.repeat(memory.shape[0], 1, 1)
-
         memory = valid_mask.to(memory.dtype) * memory
 
         output_memory = self.enc_output(memory)
@@ -933,26 +932,22 @@ class DFINETransformer:
             "num_decoder_layers": 3,
             "eval_idx": -1,
             "num_points_list": [6, 6],
-            "eval_spatial_size": [640, 640],
         },
         "dfine_hgnetv2_s": {
             "feat_channels": [256, 256, 256],
             "num_decoder_layers": 3,
             "eval_idx": -1,
-            "eval_spatial_size": [640, 640],
             "num_points_list": [3, 6, 3],
         },
         "dfine_hgnetv2_m": {
             "num_decoder_layers": 4,
             "eval_idx": -1,
-            "eval_spatial_size": [640, 640],
         },
         "dfine_hgnetv2_l": {},
         "dfine_hgnetv2_x": {
             "feat_channels": [384, 384, 384],
             "reg_scale": 8.0,
             "eval_idx": -1,
-            "eval_spatial_size": [640, 640],
         },
         "deim_dfine_hgnetv2_n": {
             "feat_channels": [128, 128],
@@ -963,21 +958,18 @@ class DFINETransformer:
             "num_decoder_layers": 3,
             "eval_idx": -1,
             "num_points_list": [6, 6],
-            "eval_spatial_size": [640, 640],
             "activation": nn.SiLU,
         },
         "deim_dfine_hgnetv2_s": {
             "feat_channels": [256, 256, 256],
             "num_decoder_layers": 3,
             "eval_idx": -1,
-            "eval_spatial_size": [640, 640],
             "num_points_list": [3, 6, 3],
             "activation": nn.SiLU,
         },
         "deim_dfine_hgnetv2_m": {
             "num_decoder_layers": 4,
             "eval_idx": -1,
-            "eval_spatial_size": [640, 640],
             "activation": nn.SiLU,
         },
         "deim_dfine_hgnetv2_l": {
@@ -987,12 +979,13 @@ class DFINETransformer:
             "feat_channels": [384, 384, 384],
             "reg_scale": 8.0,
             "eval_idx": -1,
-            "eval_spatial_size": [640, 640],
             "activation": nn.SiLU,
         },
     }
 
-    def __new__(cls, model_name: str, num_classes: int) -> DFINETransformerModule:
+    def __new__(
+        cls, model_name: str, num_classes: int, eval_spatial_size: tuple[int, int] = (640, 640)
+    ) -> DFINETransformerModule:
         """Constructor for DFINETransformerModule."""
         cfg = cls.decoder_cfg[model_name]
-        return DFINETransformerModule(num_classes=num_classes, **cfg)
+        return DFINETransformerModule(num_classes=num_classes, eval_spatial_size=eval_spatial_size, **cfg)
diff --git a/library/src/otx/backend/native/tools/adaptive_bs/algorithm.py b/library/src/otx/backend/native/tools/adaptive_bs/algorithm.py
index 0391756b07..6268d9e5d7 100644
--- a/library/src/otx/backend/native/tools/adaptive_bs/algorithm.py
+++ b/library/src/otx/backend/native/tools/adaptive_bs/algorithm.py
@@ -47,8 +47,8 @@ def __init__(
         self._max_bs = max_bs
         self._bs_try_history: dict[int, int] = {}
         self._total_mem = _get_total_memory_size()
-        self._mem_lower_bound = 0.8 * self._total_mem
-        self._mem_upper_bound = 0.85 * self._total_mem
+        self._mem_lower_bound = 0.75 * self._total_mem
+        self._mem_upper_bound = 0.9 * self._total_mem
         self._mp_ctx = mp.get_context("spawn")
 
     def _try_batch_size(self, bs: int) -> tuple[bool, int]:
@@ -115,16 +115,16 @@ def auto_decrease_batch_size(self) -> int:
             if oom:
                 logger.warning(
                     "The auto batch size algorithm attempted to use a batch size of 2 but still "
-                    "encountered a CUDA OOM error. OTX will proceed with training at batch size 2; "
-                    "however, you will likely encounter a CUDA OOM error once training starts. "
-                    "If the issue persists, please report it accordingly.",
+                    "encountered a CUDA OOM error. OTX will proceed with training at batch size 1; "
+                    "however, it is also possible to encounter a CUDA OOM error during training.",
                 )
-                return 2
+                return 1
             logger.warning(
                 "Even with a batch size of 2, most of the memory is used, "
-                "which could cause the training to fail midway.",
+                "which could cause the training to fail midway."
+                "For safety reasons, decrease bs to 1.",
             )
-            available_bs = 2
+            available_bs = 1
 
         return available_bs
 
@@ -157,9 +157,10 @@ def find_big_enough_batch_size(self, drop_last: bool = False) -> int:
                     raise RuntimeError(msg)
                 logger.warning(
                     "Even with a batch size of 2, most of the memory is used, "
-                    "which could cause the training to fail midway.",
+                    "which could cause the training to fail midway."
+                    "For safety reasons, decrease bs to 1.",
                 )
-                return 2
+                return 1
 
             return self.auto_decrease_batch_size()
 
@@ -270,6 +271,8 @@ def _run_trial(train_func: Callable[[int], Any], bs: int, trial_queue: mp.Queue)
             or "UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY" in str(e)
             or "UR error" in str(e)
             or "UR_RESULT_ERROR_UNKNOWN" in str(e)
+            or "UR_RESULT_ERROR_OUT_OF_HOST_MEMORY" in str(e)
+            or "UR_RESULT_ERROR" in str(e)
         ):  # XPU OOM
             oom = True
         else:
diff --git a/library/src/otx/backend/native/tools/adaptive_bs/runner.py b/library/src/otx/backend/native/tools/adaptive_bs/runner.py
index 3c149fec20..47cd817036 100644
--- a/library/src/otx/backend/native/tools/adaptive_bs/runner.py
+++ b/library/src/otx/backend/native/tools/adaptive_bs/runner.py
@@ -114,10 +114,6 @@ def _register_callback(callbacks: list[Callback] | Callback | None = None) -> li
 
 def _apply_new_batch_size(engine: OTXEngine, new_batch_size: int) -> None:
     origin_bs = engine.datamodule.train_subset.batch_size
-    if is_xpu_available() and new_batch_size != 1:
-        new_batch_size -= 1  # for safety reasons
-    if new_batch_size == origin_bs:
-        return
     engine.datamodule.train_subset.batch_size = new_batch_size
     engine.datamodule.val_subset.batch_size = new_batch_size
     engine.datamodule.test_subset.batch_size = new_batch_size
diff --git a/library/src/otx/data/transform_libs/torchvision.py b/library/src/otx/data/transform_libs/torchvision.py
index 16b25c6559..07eb4f3416 100644
--- a/library/src/otx/data/transform_libs/torchvision.py
+++ b/library/src/otx/data/transform_libs/torchvision.py
@@ -1159,7 +1159,6 @@ def __init__(
     ) -> None:
         super().__init__()
         self._validate_parameters(max_translate_ratio, scaling_ratio_range)
-
         self.max_rotate_degree = max_rotate_degree
         self.max_translate_ratio = max_translate_ratio
         self.scaling_ratio_range = scaling_ratio_range
@@ -1238,7 +1237,13 @@ def forward(self, *_inputs: OTXDataItem) -> OTXDataItem:
         homography_matrix = self._get_random_homography_matrix(height, width)
         output_shape = (height + self.border[0] * 2, width + self.border[1] * 2)
 
-        if hasattr(inputs, "bboxes") and inputs.bboxes is not None and len(inputs.bboxes) > 0:
+        transformed_img = self._warp_image(img, homography_matrix, output_shape)
+        inputs.image = transformed_img
+        inputs.img_info = _resize_image_info(inputs.img_info, transformed_img.shape[:2])
+        valid_index = None
+        valid_bboxes = hasattr(inputs, "bboxes") and inputs.bboxes is not None and len(inputs.bboxes) > 0
+
+        if valid_bboxes:
             # Test transform bboxes to see if any remain valid
             valid_index = self._transform_bboxes(inputs, homography_matrix, output_shape)
             # If no valid annotations will remain after transformation, skip entirely
@@ -1246,20 +1251,14 @@ def forward(self, *_inputs: OTXDataItem) -> OTXDataItem:
                 inputs.image = img
                 return self.convert(inputs)  # type: ignore[return-value]
 
-            # If we reach here, transformation will produce valid results, so proceed
-            # Transform image
-            transformed_img = self._warp_image(img, homography_matrix, output_shape)
-            inputs.image = transformed_img
-            inputs.img_info = _resize_image_info(inputs.img_info, transformed_img.shape[:2])
-
-            if hasattr(inputs, "masks") and inputs.masks is not None and len(inputs.masks) > 0:
-                self._transform_masks(inputs, homography_matrix, output_shape, valid_index)
+        if hasattr(inputs, "masks") and inputs.masks is not None and len(inputs.masks) > 0:
+            self._transform_masks(inputs, homography_matrix, output_shape, valid_index)
 
-            if hasattr(inputs, "polygons") and inputs.polygons is not None and len(inputs.polygons) > 0:
-                self._transform_polygons(inputs, homography_matrix, output_shape, valid_index)
+        if hasattr(inputs, "polygons") and inputs.polygons is not None and len(inputs.polygons) > 0:
+            self._transform_polygons(inputs, homography_matrix, output_shape, valid_index)
 
-            if self.recompute_bbox:
-                self._recompute_bboxes(inputs, output_shape)
+        if valid_bboxes and self.recompute_bbox:
+            self._recompute_bboxes(inputs, output_shape)
 
         return self.convert(inputs)  # type: ignore[return-value]
 
@@ -1321,7 +1320,7 @@ def _transform_masks(
         inputs: OTXDataItem,
         warp_matrix: np.ndarray,
         output_size: tuple[int, int],
-        valid_index: np.ndarray,
+        valid_index: np.ndarray | None = None,
     ) -> None:
         """Transform masks using the warp matrix.
 
@@ -1335,11 +1334,11 @@ def _transform_masks(
             return
 
         # Convert valid_index to numpy boolean array if it's a tensor
-        if hasattr(valid_index, "numpy"):
+        if valid_index is not None and hasattr(valid_index, "numpy"):
             valid_index = valid_index.numpy()
 
         # Filter masks using valid_index first
-        masks = inputs.masks[valid_index]
+        masks = inputs.masks[valid_index] if valid_index is not None else inputs.masks
         masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
 
         if masks.ndim == 3:
@@ -1378,15 +1377,20 @@ def _warp_single_mask(self, mask: np.ndarray, warp_matrix: np.ndarray, output_si
             )
             return warped_mask > 127
 
-        msg = "Multi-class masks are not supported yet."
-        raise NotImplementedError(msg)
+        return cv2.warpPerspective(
+            mask.astype(np.uint8),
+            warp_matrix,
+            dsize=(width, height),
+            flags=cv2.INTER_NEAREST,
+            borderValue=0,
+        )
 
     def _transform_polygons(
         self,
         inputs: OTXDataItem,
         warp_matrix: np.ndarray,
         output_shape: tuple[int, int],
-        valid_index: np.ndarray,
+        valid_index: np.ndarray | None = None,
     ) -> None:
         """Transform polygons using the warp matrix.
 
@@ -1405,11 +1409,13 @@ def _transform_polygons(
             return
 
         # Convert valid_index to numpy boolean array if it's a tensor
-        if hasattr(valid_index, "numpy"):
+        if valid_index is not None and hasattr(valid_index, "numpy"):
             valid_index = valid_index.numpy()
 
-        # Filter polygons using valid_index
-        filtered_polygons = [p for p, keep in zip(inputs.polygons, valid_index) if keep]
+        # Filter polygons using valid_index if available
+        filtered_polygons = (
+            [p for p, keep in zip(inputs.polygons, valid_index) if keep] if valid_index is not None else inputs.polygons
+        )
 
         if filtered_polygons:
             inputs.polygons = project_polygons(filtered_polygons, warp_matrix, output_shape)
diff --git a/library/src/otx/data/utils/pre_filtering.py b/library/src/otx/data/utils/pre_filtering.py
index 7f4a265cb7..4bf59d9a3c 100644
--- a/library/src/otx/data/utils/pre_filtering.py
+++ b/library/src/otx/data/utils/pre_filtering.py
@@ -10,7 +10,7 @@
 from functools import partial
 from typing import TYPE_CHECKING
 
-from datumaro.components.annotation import Annotation, Bbox, Ellipse, Polygon
+from datumaro.components.annotation import Annotation, AnnotationType, Bbox, Ellipse, Points, Polygon
 from datumaro.components.dataset import Dataset as DmDataset
 
 from otx.types.task import OTXTaskType
@@ -19,6 +19,14 @@
     from datumaro.components.dataset_base import DatasetItem
 
 
+def get_labels(dataset: DmDataset, task: OTXTaskType) -> list[str]:
+    """Get the labels from the dataset."""
+    # label is funky from arrow dataset
+    if task == OTXTaskType.KEYPOINT_DETECTION:
+        return dataset.categories()[AnnotationType.points][0].labels
+    return dataset.categories()[AnnotationType.label]
+
+
 def pre_filtering(
     dataset: DmDataset,
     data_format: str,
@@ -42,7 +50,16 @@ def pre_filtering(
     used_background_items = set()
     msg = f"There are empty annotation items in train set, Of these, only {unannotated_items_ratio*100}% are used."
     warnings.warn(msg, stacklevel=2)
-    dataset = DmDataset.filter(dataset, partial(is_valid_anno_for_task, task=task), filter_annotations=True)
+
+    labels = get_labels(dataset, task)
+
+    dataset = DmDataset.filter(
+        dataset,
+        partial(is_valid_anno_for_task, task=task, labels=labels),
+        filter_annotations=True,
+    )
+    if task == OTXTaskType.KEYPOINT_DETECTION:
+        return dataset
     dataset = remove_unused_labels(dataset, data_format, ignore_index)
     if unannotated_items_ratio > 0:
         empty_items = [
@@ -61,7 +78,7 @@ def pre_filtering(
     )
 
 
-def is_valid_annot(item: DatasetItem, annotation: Annotation) -> bool:  # noqa: ARG001
+def is_valid_annot(item: DatasetItem, annotation: Annotation, labels: list[str]) -> bool:  # noqa: ARG001
     """Return whether DatasetItem's annotation is valid."""
     if isinstance(annotation, Bbox):
         x1, y1, x2, y2 = annotation.points
@@ -79,28 +96,45 @@ def is_valid_annot(item: DatasetItem, annotation: Annotation) -> bool:  # noqa:
             return True
         msg = "There are invalid polygon, they will be filtered out before training."
         return False
+    if isinstance(annotation, Points):
+        # For keypoint detection, num of (x, y) points should be equal to num of labels
+        if len(annotation.points) == 0:
+            msg = "There are invalid points, they will be filtered out before training."
+            warnings.warn(msg, stacklevel=2)
+            return False
+        return len(annotation.points) // 2 == len(labels)
+
     return True
 
 
-def is_valid_anno_for_task(item: DatasetItem, annotation: Annotation, task: OTXTaskType) -> bool:
+def is_valid_anno_for_task(
+    item: DatasetItem,
+    annotation: Annotation,
+    task: OTXTaskType,
+    labels: list[str],
+) -> bool:
     """Return whether DatasetItem's annotation is valid for a specific task.
 
     Args:
         item (DatasetItem): The item to be checked.
         annotation (Annotation): The annotation to be checked.
         task (OTXTaskType): The task type of the dataset.
+        labels (list[str]): The labels of the dataset.
 
     Returns:
         bool: True if the annotation is valid for the task, False otherwise.
     """
     if task == OTXTaskType.DETECTION:
-        return isinstance(annotation, Bbox) and is_valid_annot(item, annotation)
+        return isinstance(annotation, Bbox) and is_valid_annot(item, annotation, labels)
 
     # Rotated detection is a subset of instance segmentation
     if task in [OTXTaskType.INSTANCE_SEGMENTATION, OTXTaskType.ROTATED_DETECTION]:
-        return isinstance(annotation, (Polygon, Bbox, Ellipse)) and is_valid_annot(item, annotation)
+        return isinstance(annotation, (Polygon, Bbox, Ellipse)) and is_valid_annot(item, annotation, labels)
+
+    if task == OTXTaskType.KEYPOINT_DETECTION:
+        return isinstance(annotation, Points) and is_valid_annot(item, annotation, labels)
 
-    return is_valid_annot(item, annotation)
+    return is_valid_annot(item, annotation, labels)
 
 
 def remove_unused_labels(
diff --git a/library/src/otx/recipe/detection/deim_dfine_l.yaml b/library/src/otx/recipe/detection/deim_dfine_l.yaml
index 4753c0f8b0..df29f11c38 100644
--- a/library/src/otx/recipe/detection/deim_dfine_l.yaml
+++ b/library/src/otx/recipe/detection/deim_dfine_l.yaml
@@ -215,7 +215,22 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms: []
+      transforms:
+        - class_path: otx.data.transform_libs.torchvision.Resize
+          init_args:
+            scale: $(input_size)
+            keep_ratio: false
+        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            probability: 0.5
+        - class_path: torchvision.transforms.v2.ToDtype
+          init_args:
+            dtype: ${as_torch_dtype:torch.float32}
+            scale: false
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [255.0, 255.0, 255.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
diff --git a/library/src/otx/recipe/detection/deim_dfine_m.yaml b/library/src/otx/recipe/detection/deim_dfine_m.yaml
index 4b52e73c60..0a8337cceb 100644
--- a/library/src/otx/recipe/detection/deim_dfine_m.yaml
+++ b/library/src/otx/recipe/detection/deim_dfine_m.yaml
@@ -214,7 +214,22 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms: []
+      transforms:
+        - class_path: otx.data.transform_libs.torchvision.Resize
+          init_args:
+            scale: $(input_size)
+            keep_ratio: false
+        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            probability: 0.5
+        - class_path: torchvision.transforms.v2.ToDtype
+          init_args:
+            dtype: ${as_torch_dtype:torch.float32}
+            scale: false
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [255.0, 255.0, 255.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
diff --git a/library/src/otx/recipe/detection/deim_dfine_x.yaml b/library/src/otx/recipe/detection/deim_dfine_x.yaml
index d6d3d31b05..f3d30b8e93 100644
--- a/library/src/otx/recipe/detection/deim_dfine_x.yaml
+++ b/library/src/otx/recipe/detection/deim_dfine_x.yaml
@@ -215,7 +215,22 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms: []
+      transforms:
+        - class_path: otx.data.transform_libs.torchvision.Resize
+          init_args:
+            scale: $(input_size)
+            keep_ratio: false
+        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            probability: 0.5
+        - class_path: torchvision.transforms.v2.ToDtype
+          init_args:
+            dtype: ${as_torch_dtype:torch.float32}
+            scale: false
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [255.0, 255.0, 255.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
diff --git a/library/src/otx/recipe/detection/dfine_x.yaml b/library/src/otx/recipe/detection/dfine_x.yaml
index ec392d11b3..7b22280f16 100644
--- a/library/src/otx/recipe/detection/dfine_x.yaml
+++ b/library/src/otx/recipe/detection/dfine_x.yaml
@@ -66,26 +66,25 @@ overrides:
       batch_size: 8
       num_workers: 4
       transforms:
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          init_args:
-            p: 0.5
         - class_path: torchvision.transforms.v2.RandomZoomOut
+          enable: true
           init_args:
             fill: 0
         - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
+          enable: true
           init_args:
             probability: 0.8
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
         - class_path: otx.data.transform_libs.torchvision.Resize
           init_args:
             scale: $(input_size)
             transform_bbox: true
             keep_ratio: false
+        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            probability: 0.5
         - class_path: torchvision.transforms.v2.RandomPhotometricDistort
           enable: false
           init_args:
diff --git a/library/src/otx/recipe/detection/dfine_x_tile.yaml b/library/src/otx/recipe/detection/dfine_x_tile.yaml
index 0f59712a53..c4f5e5c9fa 100644
--- a/library/src/otx/recipe/detection/dfine_x_tile.yaml
+++ b/library/src/otx/recipe/detection/dfine_x_tile.yaml
@@ -68,26 +68,25 @@ overrides:
       num_workers: 4
       to_tv_image: true
       transforms:
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          init_args:
-            p: 0.5
         - class_path: torchvision.transforms.v2.RandomZoomOut
+          enable: true
           init_args:
             fill: 0
         - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
+          enable: true
           init_args:
             probability: 0.8
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
         - class_path: otx.data.transform_libs.torchvision.Resize
           init_args:
             scale: $(input_size)
             transform_bbox: true
             keep_ratio: false
+        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            probability: 0.5
         - class_path: torchvision.transforms.v2.RandomPhotometricDistort
           enable: false
           init_args:
diff --git a/library/src/otx/recipe/detection/rtdetr_101.yaml b/library/src/otx/recipe/detection/rtdetr_101.yaml
index 06ea3f0f33..3078b46a98 100644
--- a/library/src/otx/recipe/detection/rtdetr_101.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_101.yaml
@@ -63,6 +63,13 @@ overrides:
     train_subset:
       batch_size: 4
       transforms:
+        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
+          enable: false
+        - class_path: otx.data.transform_libs.torchvision.Resize
+          init_args:
+            scale: $(input_size)
+            keep_ratio: false
+            transform_bbox: true
         - class_path: torchvision.transforms.v2.RandomPhotometricDistort
           enable: false
           init_args:
@@ -79,11 +86,6 @@ overrides:
               - -0.05
               - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
         - class_path: otx.data.transform_libs.torchvision.RandomAffine
           enable: false
           init_args:
@@ -94,6 +96,7 @@ overrides:
               - 1.5
             max_shear_degree: 2.0
         - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          enable: true
           init_args:
             probability: 0.5
         - class_path: torchvision.transforms.v2.RandomVerticalFlip
diff --git a/library/src/otx/recipe/detection/rtdetr_18.yaml b/library/src/otx/recipe/detection/rtdetr_18.yaml
index 088b3b317e..44da32abb1 100644
--- a/library/src/otx/recipe/detection/rtdetr_18.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_18.yaml
@@ -62,6 +62,13 @@ overrides:
     train_subset:
       batch_size: 4
       transforms:
+        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
+          enable: false
+        - class_path: otx.data.transform_libs.torchvision.Resize
+          init_args:
+            scale: $(input_size)
+            keep_ratio: false
+            transform_bbox: true
         - class_path: torchvision.transforms.v2.RandomPhotometricDistort
           enable: false
           init_args:
@@ -78,11 +85,6 @@ overrides:
               - -0.05
               - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
         - class_path: otx.data.transform_libs.torchvision.RandomAffine
           enable: false
           init_args:
@@ -93,6 +95,7 @@ overrides:
               - 1.5
             max_shear_degree: 2.0
         - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          enable: true
           init_args:
             probability: 0.5
         - class_path: torchvision.transforms.v2.RandomVerticalFlip
diff --git a/library/src/otx/recipe/detection/rtdetr_50.yaml b/library/src/otx/recipe/detection/rtdetr_50.yaml
index f1da587a62..096c73868a 100644
--- a/library/src/otx/recipe/detection/rtdetr_50.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_50.yaml
@@ -63,6 +63,13 @@ overrides:
     train_subset:
       batch_size: 4
       transforms:
+        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
+          enable: false
+        - class_path: otx.data.transform_libs.torchvision.Resize
+          init_args:
+            scale: $(input_size)
+            keep_ratio: false
+            transform_bbox: true
         - class_path: torchvision.transforms.v2.RandomPhotometricDistort
           enable: false
           init_args:
@@ -79,11 +86,6 @@ overrides:
               - -0.05
               - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
         - class_path: otx.data.transform_libs.torchvision.RandomAffine
           enable: false
           init_args:
@@ -94,6 +96,7 @@ overrides:
               - 1.5
             max_shear_degree: 2.0
         - class_path: otx.data.transform_libs.torchvision.RandomFlip
+          enable: true
           init_args:
             probability: 0.5
         - class_path: torchvision.transforms.v2.RandomVerticalFlip
diff --git a/library/src/otx/recipe/detection/rtmdet_tiny.yaml b/library/src/otx/recipe/detection/rtmdet_tiny.yaml
index 577d77410c..99ed5c5047 100644
--- a/library/src/otx/recipe/detection/rtmdet_tiny.yaml
+++ b/library/src/otx/recipe/detection/rtmdet_tiny.yaml
@@ -81,15 +81,6 @@ overrides:
         - class_path: otx.data.transform_libs.torchvision.RandomCrop
           init_args:
             crop_size: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
         - class_path: torchvision.transforms.v2.RandomPhotometricDistort
           enable: false
           init_args:
@@ -106,6 +97,15 @@ overrides:
               - -0.05
               - 0.05
             p: 0.5
+        - class_path: otx.data.transform_libs.torchvision.RandomAffine
+          enable: false
+          init_args:
+            max_rotate_degree: 10.0
+            max_translate_ratio: 0.1
+            scaling_ratio_range:
+              - 0.5
+              - 1.5
+            max_shear_degree: 2.0
         - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
         - class_path: otx.data.transform_libs.torchvision.RandomFlip
           init_args:
diff --git a/library/src/otx/tools/converter.py b/library/src/otx/tools/converter.py
index 1e8dbea15f..aaf2621046 100644
--- a/library/src/otx/tools/converter.py
+++ b/library/src/otx/tools/converter.py
@@ -272,6 +272,15 @@ def update_num_iters(param_value: int | None, config: dict) -> None:
     config["max_epochs"] = param_value
 
 
+def update_batch_size(param_value: int | None, config: dict) -> None:
+    """Update batch size in the config."""
+    if param_value is None:
+        logging.info("Batch size is not provided, skipping update.")
+        return
+    config["data"]["train_subset"]["batch_size"] = param_value
+    config["data"]["val_subset"]["batch_size"] = param_value
+
+
 def update_early_stopping(early_stopping_cfg: dict | None, config: dict) -> None:
     """Update early stopping parameters in the config."""
     if early_stopping_cfg is None:
@@ -483,6 +492,7 @@ def _update_params(config: dict, param_dict: dict) -> None:
         update_tiling(tiling, config)
         update_augmentations(augmentation_params, config)
         update_learning_rate(training_parameters.get("learning_rate", None), config)
+        update_batch_size(training_parameters.get("batch_size", None), config)
         update_num_iters(training_parameters.get("max_epochs", None), config)
         update_early_stopping(training_parameters.get("early_stopping", None), config)
         update_input_size(
diff --git a/library/tests/assets/geti/model_configs/detection.yaml b/library/tests/assets/geti/model_configs/detection.yaml
index 282b65e9a1..fea9c36b57 100644
--- a/library/tests/assets/geti/model_configs/detection.yaml
+++ b/library/tests/assets/geti/model_configs/detection.yaml
@@ -70,6 +70,7 @@ hyperparameters:
       enable: true
       patience: 10
     learning_rate: 0.001
+    batch_size: 4
     input_size_width: 800
     input_size_height: 992
   evaluation:
diff --git a/library/tests/unit/backend/native/tools/adaptive_bs/test_bs_search_algo.py b/library/tests/unit/backend/native/tools/adaptive_bs/test_bs_search_algo.py
index a6ed580a01..6725d8f481 100644
--- a/library/tests/unit/backend/native/tools/adaptive_bs/test_bs_search_algo.py
+++ b/library/tests/unit/backend/native/tools/adaptive_bs/test_bs_search_algo.py
@@ -68,9 +68,9 @@ def mock_train_func(batch_size) -> int:
                 msg = "CUDA out of memory."
                 raise RuntimeError(msg)
             if batch_size > max_runnable_bs:
-                mem_usage = 8500 + 1500 * batch_size / (cuda_oom_bound - max_runnable_bs)
+                mem_usage = 9000 + 1500 * batch_size / (cuda_oom_bound - max_runnable_bs)
             else:
-                mem_usage = 8500 * batch_size / max_runnable_bs
+                mem_usage = 9000 * batch_size / max_runnable_bs
 
             self.mock_torch.cuda.max_memory_reserved.return_value = mem_usage
             return mem_usage
@@ -110,14 +110,14 @@ def test_find_max_usable_bs_gpu_memory_too_small(self):
         mock_train_func = self.get_mock_train_func(cuda_oom_bound=1, max_runnable_bs=1)
 
         bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000)
-        assert bs_search_algo.auto_decrease_batch_size() == 2
+        assert bs_search_algo.auto_decrease_batch_size() == 1
 
     def test_auto_decrease_batch_size_bs2_not_oom_but_most_mem(self):
         """Batch size 2 doesn't make oom but use most of memory."""
         mock_train_func = self.get_mock_train_func(cuda_oom_bound=2, max_runnable_bs=1)
 
         bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000)
-        assert bs_search_algo.auto_decrease_batch_size() == 2
+        assert bs_search_algo.auto_decrease_batch_size() == 1
 
     @pytest.mark.parametrize(
         ("max_runnable_bs", "max_bs", "expected_bs"),
@@ -135,7 +135,7 @@ def test_find_big_enough_batch_size(self, max_runnable_bs, max_bs, expected_bs):
         adapted_bs = bs_search_algo.find_big_enough_batch_size()
 
         if expected_bs is None:
-            assert 7500 <= mock_train_func(adapted_bs) <= 8500
+            assert 7500 <= mock_train_func(adapted_bs) <= 9000
         else:
             assert adapted_bs == expected_bs
 
@@ -143,14 +143,14 @@ def test_find_big_enough_batch_size_gpu_memory_too_small(self):
         mock_train_func = self.get_mock_train_func(cuda_oom_bound=1, max_runnable_bs=1)
 
         bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000)
-        assert bs_search_algo.find_big_enough_batch_size() == 2
+        assert bs_search_algo.find_big_enough_batch_size() == 1
 
     def test_find_big_enough_batch_size_bs2_not_oom_but_most_mem(self):
         """Batch size 2 doesn't make oom but use most of memory."""
         mock_train_func = self.get_mock_train_func(cuda_oom_bound=2, max_runnable_bs=1)
 
         bs_search_algo = BsSearchAlgo(mock_train_func, 2, 1000)
-        assert bs_search_algo.find_big_enough_batch_size() == 2
+        assert bs_search_algo.find_big_enough_batch_size() == 1
 
     def test_find_big_enough_batch_size_gradient_zero(self):
         def mock_train_func(batch_size) -> int:
@@ -167,7 +167,7 @@ def mock_train_func(batch_size) -> int:
         bs_search_algo = BsSearchAlgo(mock_train_func, 64, 1000)
         adapted_bs = bs_search_algo.find_big_enough_batch_size()
 
-        assert adapted_bs == 100
+        assert adapted_bs == 102
 
     def test_find_big_enough_batch_size_not_exceed_upper_bound(self):
         def mock_train_func(batch_size) -> int:
@@ -184,7 +184,7 @@ def mock_train_func(batch_size) -> int:
         bs_search_algo = BsSearchAlgo(mock_train_func, 64, 1000)
         adapted_bs = bs_search_algo.find_big_enough_batch_size()
 
-        assert mock_train_func(adapted_bs) <= 8500
+        assert mock_train_func(adapted_bs) <= 9000
 
     def test_find_big_enough_batch_size_drop_last(self):
         mock_train_func = self.get_mock_train_func(cuda_oom_bound=10000, max_runnable_bs=180)
diff --git a/library/tests/unit/data/test_pre_filtering.py b/library/tests/unit/data/test_pre_filtering.py
index 53a0831294..84371188e8 100644
--- a/library/tests/unit/data/test_pre_filtering.py
+++ b/library/tests/unit/data/test_pre_filtering.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-from datumaro.components.annotation import AnnotationType, Bbox, Ellipse, Label, Polygon
+from datumaro.components.annotation import AnnotationType, Bbox, Ellipse, Label, Points, Polygon
 from datumaro.components.dataset import Dataset as DmDataset
 from datumaro.components.dataset_base import DatasetItem
 
@@ -137,6 +137,26 @@ class TestIsValidAnnoForTask:
             (OTXTaskType.ROTATED_DETECTION, Polygon(points=[0, 0, 10, 0, 10, 10, 0, 10], label=0), True),
             (OTXTaskType.ROTATED_DETECTION, Ellipse(x1=0, y1=0, x2=10, y2=10, label=0), True),
             (OTXTaskType.ROTATED_DETECTION, Label(label=0), False),
+            # KEYPOINT_DETECTION task tests
+            (
+                OTXTaskType.KEYPOINT_DETECTION,
+                Points(points=[10, 20, 30, 40], label=0),
+                True,
+            ),  # 2 keypoints, will use 2 labels
+            (
+                OTXTaskType.KEYPOINT_DETECTION,
+                Points(points=[10, 20, 30, 40, 50, 60], label=0),
+                True,
+            ),  # 3 keypoints, will use 3 labels
+            (OTXTaskType.KEYPOINT_DETECTION, Points(points=[10, 20], label=0), True),  # 1 keypoint, will use 1 label
+            (OTXTaskType.KEYPOINT_DETECTION, Points(points=[], label=0), False),  # 0 keypoints, will use 0 labels
+            (OTXTaskType.KEYPOINT_DETECTION, Bbox(x=0, y=0, w=10, h=10, label=0), False),  # Wrong type
+            (
+                OTXTaskType.KEYPOINT_DETECTION,
+                Polygon(points=[0, 0, 10, 0, 10, 10, 0, 10], label=0),
+                False,
+            ),  # Wrong type
+            (OTXTaskType.KEYPOINT_DETECTION, Label(label=0), False),  # Wrong type
         ],
     )
     def test_is_valid_anno_for_task(
@@ -154,25 +174,34 @@ def test_is_valid_anno_for_task(
             annotation: The annotation to test
             expected: Expected result (True if valid, False if invalid)
         """
-        result = is_valid_anno_for_task(fxt_dataset_item, annotation, task)
+        # For keypoint detection, we need to provide the correct number of labels
+        # based on the number of keypoints in the annotation
+        if task == OTXTaskType.KEYPOINT_DETECTION and isinstance(annotation, Points):
+            # Calculate expected number of labels based on points (each keypoint is x,y pair)
+            expected_labels = len(annotation.points) // 2
+            labels = [f"keypoint_{i}" for i in range(expected_labels)]
+        else:
+            labels = [0]
+
+        result = is_valid_anno_for_task(fxt_dataset_item, annotation, task, labels)
         assert result == expected, f"Expected {expected} for task {task} with annotation {type(annotation).__name__}"
 
     def test_detection_task_with_valid_bbox(self, fxt_dataset_item: DatasetItem) -> None:
         """Test DETECTION task with valid bounding box."""
         bbox = Bbox(x=5, y=5, w=20, h=15, label=0)
-        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.DETECTION)
+        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.DETECTION, [0])
         assert result is True
 
     def test_detection_task_with_invalid_bbox(self, fxt_dataset_item: DatasetItem) -> None:
         """Test DETECTION task with invalid bounding box (negative dimensions)."""
         bbox = Bbox(x=10, y=10, w=-5, h=-5, label=0)
-        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.DETECTION)
+        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.DETECTION, [0])
         assert result is False
 
     def test_detection_task_with_zero_dimension_bbox(self, fxt_dataset_item: DatasetItem) -> None:
         """Test DETECTION task with zero dimension bounding box."""
         bbox = Bbox(x=10, y=10, w=0, h=0, label=0)
-        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.DETECTION)
+        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.DETECTION, [0])
         assert result is False
 
     def test_detection_task_with_wrong_annotation_type(self, fxt_dataset_item: DatasetItem) -> None:
@@ -181,9 +210,9 @@ def test_detection_task_with_wrong_annotation_type(self, fxt_dataset_item: Datas
         ellipse = Ellipse(x1=0, y1=0, x2=10, y2=10, label=0)
         label = Label(label=0)
 
-        assert is_valid_anno_for_task(fxt_dataset_item, polygon, OTXTaskType.DETECTION) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, ellipse, OTXTaskType.DETECTION) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.DETECTION) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, polygon, OTXTaskType.DETECTION, [0]) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, ellipse, OTXTaskType.DETECTION, [0]) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.DETECTION, [0]) is False
 
     def test_instance_segmentation_task_with_valid_annotations(self, fxt_dataset_item: DatasetItem) -> None:
         """Test INSTANCE_SEGMENTATION task with valid annotation types."""
@@ -191,9 +220,9 @@ def test_instance_segmentation_task_with_valid_annotations(self, fxt_dataset_ite
         polygon = Polygon(points=[0, 0, 10, 0, 10, 10, 0, 10], label=0)
         ellipse = Ellipse(x1=0, y1=0, x2=10, y2=10, label=0)
 
-        assert is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.INSTANCE_SEGMENTATION) is True
-        assert is_valid_anno_for_task(fxt_dataset_item, polygon, OTXTaskType.INSTANCE_SEGMENTATION) is True
-        assert is_valid_anno_for_task(fxt_dataset_item, ellipse, OTXTaskType.INSTANCE_SEGMENTATION) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, polygon, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, ellipse, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is True
 
     def test_instance_segmentation_task_with_invalid_annotations(self, fxt_dataset_item: DatasetItem) -> None:
         """Test INSTANCE_SEGMENTATION task with invalid annotation types."""
@@ -201,9 +230,11 @@ def test_instance_segmentation_task_with_invalid_annotations(self, fxt_dataset_i
         invalid_polygon = Polygon(points=[0, 0, 0, 0, 0, 0], label=0)  # Degenerate polygon
         label = Label(label=0)  # Wrong type
 
-        assert is_valid_anno_for_task(fxt_dataset_item, invalid_bbox, OTXTaskType.INSTANCE_SEGMENTATION) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, invalid_polygon, OTXTaskType.INSTANCE_SEGMENTATION) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.INSTANCE_SEGMENTATION) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, invalid_bbox, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is False
+        assert (
+            is_valid_anno_for_task(fxt_dataset_item, invalid_polygon, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is False
+        )
+        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is False
 
     def test_other_task_types_use_default_validation(self, fxt_dataset_item: DatasetItem) -> None:
         """Test that other task types use the default is_valid_annot behavior."""
@@ -214,33 +245,128 @@ def test_other_task_types_use_default_validation(self, fxt_dataset_item: Dataset
         label = Label(label=0)
 
         # Test with CLASSIFICATION task
-        assert is_valid_anno_for_task(fxt_dataset_item, valid_bbox, OTXTaskType.MULTI_CLASS_CLS) is True
-        assert is_valid_anno_for_task(fxt_dataset_item, invalid_bbox, OTXTaskType.MULTI_CLASS_CLS) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, valid_polygon, OTXTaskType.MULTI_CLASS_CLS) is True
-        assert is_valid_anno_for_task(fxt_dataset_item, invalid_polygon, OTXTaskType.MULTI_CLASS_CLS) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.MULTI_CLASS_CLS) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, valid_bbox, OTXTaskType.MULTI_CLASS_CLS, [0]) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, invalid_bbox, OTXTaskType.MULTI_CLASS_CLS, [0]) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, valid_polygon, OTXTaskType.MULTI_CLASS_CLS, [0]) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, invalid_polygon, OTXTaskType.MULTI_CLASS_CLS, [0]) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.MULTI_CLASS_CLS, [0]) is True
 
         # Test with SEMANTIC_SEGMENTATION task
-        assert is_valid_anno_for_task(fxt_dataset_item, valid_bbox, OTXTaskType.SEMANTIC_SEGMENTATION) is True
-        assert is_valid_anno_for_task(fxt_dataset_item, invalid_bbox, OTXTaskType.SEMANTIC_SEGMENTATION) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, valid_polygon, OTXTaskType.SEMANTIC_SEGMENTATION) is True
-        assert is_valid_anno_for_task(fxt_dataset_item, invalid_polygon, OTXTaskType.SEMANTIC_SEGMENTATION) is False
-        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.SEMANTIC_SEGMENTATION) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, valid_bbox, OTXTaskType.SEMANTIC_SEGMENTATION, [0]) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, invalid_bbox, OTXTaskType.SEMANTIC_SEGMENTATION, [0]) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, valid_polygon, OTXTaskType.SEMANTIC_SEGMENTATION, [0]) is True
+        assert (
+            is_valid_anno_for_task(fxt_dataset_item, invalid_polygon, OTXTaskType.SEMANTIC_SEGMENTATION, [0]) is False
+        )
+        assert is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.SEMANTIC_SEGMENTATION, [0]) is True
 
     def test_edge_cases(self, fxt_dataset_item: DatasetItem) -> None:
         """Test edge cases for annotation validation."""
         # Very small but valid bbox
         small_bbox = Bbox(x=0, y=0, w=0.1, h=0.1, label=0)
-        assert is_valid_anno_for_task(fxt_dataset_item, small_bbox, OTXTaskType.DETECTION) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, small_bbox, OTXTaskType.DETECTION, [0]) is True
 
         # Bbox with equal coordinates (should be invalid)
         equal_bbox = Bbox(x=5, y=5, w=0, h=0, label=0)
-        assert is_valid_anno_for_task(fxt_dataset_item, equal_bbox, OTXTaskType.DETECTION) is False
+        assert is_valid_anno_for_task(fxt_dataset_item, equal_bbox, OTXTaskType.DETECTION, [0]) is False
 
         # Polygon with minimal valid area
         minimal_polygon = Polygon(points=[0, 0, 1, 0, 1, 1, 0, 1], label=0)
-        assert is_valid_anno_for_task(fxt_dataset_item, minimal_polygon, OTXTaskType.INSTANCE_SEGMENTATION) is True
+        assert is_valid_anno_for_task(fxt_dataset_item, minimal_polygon, OTXTaskType.INSTANCE_SEGMENTATION, [0]) is True
 
         # Degenerate polygon (should be invalid)
         degenerate_polygon = Polygon(points=[0, 0, 0, 0, 0, 0], label=0)
-        assert is_valid_anno_for_task(fxt_dataset_item, degenerate_polygon, OTXTaskType.INSTANCE_SEGMENTATION) is False
+        assert (
+            is_valid_anno_for_task(fxt_dataset_item, degenerate_polygon, OTXTaskType.INSTANCE_SEGMENTATION, [0])
+            is False
+        )
+
+    def test_keypoint_detection_task_with_valid_points(self, fxt_dataset_item: DatasetItem) -> None:
+        """Test KEYPOINT_DETECTION task with valid Points annotations."""
+        # Test with 2 keypoints (4 coordinates: x1, y1, x2, y2)
+        points_2_kp = Points(points=[10, 20, 30, 40], label=0)
+        labels_2 = ["left_eye", "right_eye"]
+        result = is_valid_anno_for_task(fxt_dataset_item, points_2_kp, OTXTaskType.KEYPOINT_DETECTION, labels_2)
+        assert result is True
+
+        # Test with 4 keypoints (8 coordinates: x1, y1, x2, y2, x3, y3, x4, y4)
+        points_4_kp = Points(points=[10, 20, 30, 40, 50, 60, 70, 80], label=0)
+        labels_4 = ["left_eye", "right_eye", "nose", "mouth"]
+        result = is_valid_anno_for_task(fxt_dataset_item, points_4_kp, OTXTaskType.KEYPOINT_DETECTION, labels_4)
+        assert result is True
+
+        # Test with single keypoint (2 coordinates: x1, y1)
+        points_1_kp = Points(points=[10, 20], label=0)
+        labels_1 = ["center"]
+        result = is_valid_anno_for_task(fxt_dataset_item, points_1_kp, OTXTaskType.KEYPOINT_DETECTION, labels_1)
+        assert result is True
+
+    def test_keypoint_detection_task_with_invalid_points(self, fxt_dataset_item: DatasetItem) -> None:
+        """Test KEYPOINT_DETECTION task with invalid Points annotations."""
+        # Test with empty points
+        empty_points = Points(points=[], label=0)
+        labels = ["keypoint1", "keypoint2"]
+        result = is_valid_anno_for_task(fxt_dataset_item, empty_points, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+        # Test with wrong number of keypoints (too many)
+        too_many_points = Points(points=[10, 20, 30, 40, 50, 60], label=0)  # 3 keypoints
+        labels = ["keypoint1", "keypoint2"]  # Only 2 labels
+        result = is_valid_anno_for_task(fxt_dataset_item, too_many_points, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+        # Test with wrong number of keypoints (too few)
+        too_few_points = Points(points=[10, 20], label=0)  # 1 keypoint
+        labels = ["keypoint1", "keypoint2", "keypoint3"]  # 3 labels
+        result = is_valid_anno_for_task(fxt_dataset_item, too_few_points, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+    def test_keypoint_detection_task_with_wrong_annotation_types(self, fxt_dataset_item: DatasetItem) -> None:
+        """Test KEYPOINT_DETECTION task with non-Points annotation types."""
+        labels = ["keypoint1", "keypoint2"]
+
+        # Test with bbox (should be invalid)
+        bbox = Bbox(x=0, y=0, w=10, h=10, label=0)
+        result = is_valid_anno_for_task(fxt_dataset_item, bbox, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+        # Test with polygon (should be invalid)
+        polygon = Polygon(points=[0, 0, 10, 0, 10, 10, 0, 10], label=0)
+        result = is_valid_anno_for_task(fxt_dataset_item, polygon, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+        # Test with ellipse (should be invalid)
+        ellipse = Ellipse(x1=0, y1=0, x2=10, y2=10, label=0)
+        result = is_valid_anno_for_task(fxt_dataset_item, ellipse, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+        # Test with label (should be invalid)
+        label = Label(label=0)
+        result = is_valid_anno_for_task(fxt_dataset_item, label, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is False
+
+    def test_keypoint_detection_edge_cases(self, fxt_dataset_item: DatasetItem) -> None:
+        """Test edge cases for keypoint detection validation."""
+        # Test with zero coordinates (empty points)
+        empty_points = Points(points=[], label=0)
+        empty_labels = []
+        result = is_valid_anno_for_task(fxt_dataset_item, empty_points, OTXTaskType.KEYPOINT_DETECTION, empty_labels)
+        assert result is False  # Empty points should be invalid
+
+        # Test with many keypoints
+        many_points = Points(points=list(range(34)), label=0)  # 17 keypoints (34 coordinates)
+        many_labels = [f"keypoint_{i}" for i in range(17)]
+        result = is_valid_anno_for_task(fxt_dataset_item, many_points, OTXTaskType.KEYPOINT_DETECTION, many_labels)
+        assert result is True
+
+        # Test with negative coordinates (should still be valid as coordinates can be negative)
+        negative_points = Points(points=[-10, -20, -30, -40], label=0)
+        labels = ["keypoint1", "keypoint2"]
+        result = is_valid_anno_for_task(fxt_dataset_item, negative_points, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is True
+
+        # Test with floating point coordinates
+        float_points = Points(points=[10.5, 20.7, 30.1, 40.9], label=0)
+        labels = ["keypoint1", "keypoint2"]
+        result = is_valid_anno_for_task(fxt_dataset_item, float_points, OTXTaskType.KEYPOINT_DETECTION, labels)
+        assert result is True
diff --git a/library/tests/unit/data/transform_libs/test_torchvision.py b/library/tests/unit/data/transform_libs/test_torchvision.py
index 966f4f8c1a..02d39446ac 100644
--- a/library/tests/unit/data/transform_libs/test_torchvision.py
+++ b/library/tests/unit/data/transform_libs/test_torchvision.py
@@ -51,6 +51,17 @@ def close(self):
         return
 
 
+@pytest.fixture()
+def seg_data_entity() -> OTXDataItem:
+    masks = torch.randint(low=0, high=2, size=(1, 112, 224), dtype=torch.uint8)
+    return OTXDataItem(
+        image=tv_tensors.Image(torch.randint(low=0, high=256, size=(3, 112, 224), dtype=torch.uint8)),
+        img_info=ImageInfo(img_idx=0, img_shape=(112, 224), ori_shape=(112, 224)),
+        masks=tv_tensors.Mask(masks),
+        label=LongTensor([1]),
+    )
+
+
 @pytest.fixture()
 def det_data_entity() -> OTXDataItem:
     return OTXDataItem(
@@ -359,6 +370,22 @@ def test_forward(self, random_affine: RandomAffine, det_data_entity: OTXDataItem
         assert results.bboxes.dtype == torch.float32
         assert results.img_info.img_shape == results.image.shape[:2]
 
+    def test_segmentation_transform(
+        self, random_affine_with_mask_transform: RandomAffine, seg_data_entity: OTXDataItem
+    ) -> None:
+        """Test forward for segmentation task."""
+        original_entity = deepcopy(seg_data_entity)
+        results = random_affine_with_mask_transform(original_entity)
+
+        assert hasattr(results, "masks")
+        assert results.masks is not None
+        assert results.masks.shape[0] > 0  # Should have masks
+        assert results.masks.shape[1:] == results.image.shape[:2]  # Same spatial dimensions as image
+
+        # Check that the number of masks matches the number of remaining bboxes and labels
+        assert results.masks.shape[0] == results.label.shape[0]
+        assert isinstance(results.masks, tv_tensors.Mask)
+
     def test_forward_with_masks_transform_enabled(
         self,
         random_affine_with_mask_transform: RandomAffine,
diff --git a/library/tests/unit/tools/test_converter.py b/library/tests/unit/tools/test_converter.py
index f1856bbcd4..ac39edfab1 100644
--- a/library/tests/unit/tools/test_converter.py
+++ b/library/tests/unit/tools/test_converter.py
@@ -15,8 +15,8 @@ def test_convert(self):
         config = GetiConfigConverter.convert(asdict(otx_config))
 
         assert config["data"]["input_size"] == (992, 800)
-        assert config["data"]["train_subset"]["batch_size"] == 8
-        assert config["data"]["val_subset"]["batch_size"] == 8
+        assert config["data"]["train_subset"]["batch_size"] == 4
+        assert config["data"]["val_subset"]["batch_size"] == 4
         assert config["data"]["test_subset"]["batch_size"] == 8
         assert config["model"]["init_args"]["optimizer"]["init_args"]["lr"] == 0.001
         assert config["max_epochs"] == 100
@@ -266,8 +266,8 @@ def test_instantiate(self, tmp_path):
         assert engine.work_dir == tmp_path
 
         assert engine.datamodule.data_root == data_root
-        assert engine.datamodule.train_subset.batch_size == 8
-        assert engine.datamodule.val_subset.batch_size == 8
+        assert engine.datamodule.train_subset.batch_size == 4
+        assert engine.datamodule.val_subset.batch_size == 4
         assert engine.datamodule.test_subset.batch_size == 8
         assert engine.datamodule.train_subset.num_workers == 2
         assert engine.datamodule.val_subset.num_workers == 2