Created DB seeding for Instance Segmentation projects (#5005)

A-Artemis · web-flow · commit 62e7923caf45 · 2025-12-03T09:34:08.000Z
diff --git a/application/backend/app/cli.py b/application/backend/app/cli.py
@@ -4,22 +4,19 @@
 """Command line interface for interacting with the Geti Tune application."""
 
 import sys
-from datetime import datetime, timedelta
 
 import click
 
 from app.db import MigrationManager, get_db_session
-from app.db.schema import DatasetItemDB, LabelDB, ModelRevisionDB, PipelineDB, ProjectDB, SinkDB, SourceDB
-from app.models import (
-    DisconnectedSinkConfig,
-    DisconnectedSourceConfig,
-    FixedRateDataCollectionPolicy,
-    OutputFormat,
-    SinkType,
-    SourceType,
-    TaskType,
-    TrainingStatus,
+from app.db.schema import DatasetItemDB, ModelRevisionDB, ProjectDB, SinkDB, SourceDB
+from app.db_seeder import (
+    _create_detection_labels,
+    _create_pipeline_with_video_source,
+    _create_project,
+    _create_segmentation_labels,
+    _create_shared_sinks_sources_folders,
 )
+from app.models import TaskType
 from app.settings import get_settings
 
 settings = get_settings()
@@ -84,79 +81,68 @@ def check_db() -> None:
 @cli.command()
 @click.option("--with-model", default=False)
 def seed(with_model: bool) -> None:
-    """Seed the database with test data."""
+    """
+    Seed the database with test data.
+
+    Args:
+        with_model (bool): Whether to include pre-trained models in the seed data.
+    """
     # If the app is running, it needs to be restarted since it doesn't track direct DB changes
     # Fixed IDs are used to ensure consistency in tests
     click.echo("Seeding database with test data...")
-    project_id = "9d6af8e8-6017-4ebe-9126-33aae739c5fa"
-    with get_db_session() as db:
-        project = ProjectDB(
-            id=project_id,
-            name="Test Project",
-            task_type=TaskType.DETECTION,
-            exclusive_labels=True,
+    sources, sinks, folders = _create_shared_sinks_sources_folders()
+
+    # Project 1: Object Detection
+    detection_project = _create_project(
+        project_id="9d6af8e8-6017-4ebe-9126-33aae739c5fa",
+        task_type=TaskType.DETECTION,
+        exclusive_labels=True,
+    )
+    detection_labels = _create_detection_labels(project_id=detection_project.id)
+
+    # Project 2: Instance Segmentation
+    segmentation_project = _create_project(
+        project_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+        task_type=TaskType.INSTANCE_SEGMENTATION,
+        exclusive_labels=True,
+    )
+    segmentation_labels = _create_segmentation_labels(project_id=segmentation_project.id)
+
+    detection_pipeline = None
+    instance_segmentation_pipeline = None
+    if with_model:
+        detection_pipeline = _create_pipeline_with_video_source(
+            project_id=detection_project.id,
+            source_id="f6b1ac22-e36c-4b36-9a23-62b0881e4223",
+            source_name="Video Source - Detection",
+            video_path="data/media/card-video.mp4",
+            sink_id=folders.id,
+            model_id="977eeb18-eaac-449d-bc80-e340fbe052ad",
+            model_architecture="Object_Detection_SSD",
+            labels=detection_labels,
         )
-        db.add(project)
-        db.flush()
-        labels = [
-            LabelDB(project_id=project_id, name="Clubs", color="#2d6311", hotkey="c"),
-            LabelDB(project_id=project_id, name="Diamonds", color="#baa3b3", hotkey="d"),
-            LabelDB(project_id=project_id, name="Spades", color="#000702", hotkey="s"),
-            LabelDB(project_id=project_id, name="Hearts", color="#1f016b", hotkey="h"),
-            LabelDB(project_id=project_id, name="No_object", color="#565a84", hotkey="n"),
-        ]
-        db.add_all(labels)
-        db.flush()
 
-        # Create default disconnected source and sink
-        disconnected_source_cfg = DisconnectedSourceConfig()
-        disconnected_source = SourceDB(
-            id="00000000-0000-0000-0000-000000000000",
-            name=disconnected_source_cfg.name,
-            source_type=disconnected_source_cfg.source_type,
-            config_data={},
-        )
-        disconnected_sink_cfg = DisconnectedSinkConfig()
-        disconnected_sink = SinkDB(
-            id="00000000-0000-0000-0000-000000000000",
-            name=disconnected_sink_cfg.name,
-            sink_type=disconnected_sink_cfg.sink_type,
-            output_formats=[],
-            config_data={},
-        )
-        folder_sink = SinkDB(
-            id="6ee0c080-c7d9-4438-a7d2-067fd395eecf",
-            name="Folder Sink",
-            sink_type=SinkType.FOLDER,
-            rate_limit=0.2,
-            output_formats=[OutputFormat.IMAGE_ORIGINAL, OutputFormat.IMAGE_WITH_PREDICTIONS, OutputFormat.PREDICTIONS],
-            config_data={"folder_path": "data/output"},
+        instance_segmentation_pipeline = _create_pipeline_with_video_source(
+            project_id=segmentation_project.id,
+            source_id="b2c3d4e5-f6a7-8901-bcde-f12345678901",
+            source_name="Video Source - Segmentation",
+            video_path="data/media/fish-video.mp4",
+            sink_id=folders.id,
+            model_id="c3d4e5f6-a7b8-9012-cdef-123456789012",
+            model_architecture="Custom_Instance_Segmentation_RTMDet_tiny",
+            labels=segmentation_labels,
         )
-        db.add_all([disconnected_source, disconnected_sink, folder_sink])
+
+    with get_db_session() as db:
+        db.add_all([sources, sinks, folders, detection_project, segmentation_project])
+        db.flush()
+        db.add_all(detection_labels + segmentation_labels)
         db.flush()
 
-        pipeline = PipelineDB(project_id=project.id)
-        pipeline.source = SourceDB(
-            id="f6b1ac22-e36c-4b36-9a23-62b0881e4223",
-            name="Video Source",
-            source_type=SourceType.VIDEO_FILE,
-            config_data={"video_path": "data/media/video.mp4"},
-        )
-        pipeline.sink_id = folder_sink.id
-        pipeline.data_collection_policies = [FixedRateDataCollectionPolicy(rate=0.1).model_dump(mode="json")]
-        if with_model:
-            pipeline.model_revision = ModelRevisionDB(
-                id="977eeb18-eaac-449d-bc80-e340fbe052ad",
-                project_id=project.id,
-                architecture="Object_Detection_SSD",
-                training_status=TrainingStatus.SUCCESSFUL,
-                training_started_at=datetime.now() - timedelta(hours=24),
-                training_finished_at=datetime.now() - timedelta(hours=23),
-                training_configuration={},
-                label_schema_revision={"labels": [{"id": str(label.id), "name": label.name} for label in labels]},
-            )
-            pipeline.is_running = True
-        db.add(pipeline)
+        if with_model and detection_pipeline and instance_segmentation_pipeline:
+            db.add_all([detection_pipeline, instance_segmentation_pipeline])
+            db.flush()
+
         db.commit()
     click.echo("✓ Seeding successful!")
 
diff --git a/application/backend/app/db_seeder.py b/application/backend/app/db_seeder.py
@@ -0,0 +1,163 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from datetime import datetime, timedelta
+from uuid import UUID
+
+from app.db.schema import LabelDB, ModelRevisionDB, PipelineDB, ProjectDB, SinkDB, SourceDB
+from app.models import (
+    DisconnectedSinkConfig,
+    DisconnectedSourceConfig,
+    FixedRateDataCollectionPolicy,
+    OutputFormat,
+    SinkType,
+    SourceType,
+    TaskType,
+    TrainingStatus,
+)
+
+
+def _create_shared_sinks_sources_folders() -> tuple[SourceDB, SinkDB, SinkDB]:
+    """
+    Create shared source, sink, folder entities.
+
+    Returns:
+        tuple[SourceDB, SinkDB, SinkDB]: Created source, sink, and folder sink objects.
+    """
+    disconnected_source_cfg = DisconnectedSourceConfig()
+    disconnected_source = SourceDB(
+        id="00000000-0000-0000-0000-000000000000",
+        name=disconnected_source_cfg.name,
+        source_type=disconnected_source_cfg.source_type,
+        config_data={},
+    )
+    disconnected_sink_cfg = DisconnectedSinkConfig()
+    disconnected_sink = SinkDB(
+        id="00000000-0000-0000-0000-000000000000",
+        name=disconnected_sink_cfg.name,
+        sink_type=disconnected_sink_cfg.sink_type,
+        output_formats=[],
+        config_data={},
+    )
+    folder_sink = SinkDB(
+        id="6ee0c080-c7d9-4438-a7d2-067fd395eecf",
+        name="Folder Sink",
+        sink_type=SinkType.FOLDER,
+        rate_limit=0.2,
+        output_formats=[OutputFormat.IMAGE_ORIGINAL, OutputFormat.IMAGE_WITH_PREDICTIONS, OutputFormat.PREDICTIONS],
+        config_data={"folder_path": "data/output"},
+    )
+    return disconnected_source, disconnected_sink, folder_sink
+
+
+def _create_project(
+    project_id: str | UUID,
+    task_type: TaskType,
+    exclusive_labels: bool = True,
+) -> ProjectDB:
+    """
+    Create a project in the database.
+
+    Args:
+        project_id (str | UUID): Unique identifier for the project.
+        task_type (TaskType): Type of task (e.g., DETECTION, INSTANCE_SEGMENTATION, CLASSIFICATION).
+        exclusive_labels (bool): Whether labels are mutually exclusive.
+
+    Returns:
+        ProjectDB: Created project object.
+    """
+    return ProjectDB(
+        id=project_id,
+        name=f"Demo {task_type} project",
+        task_type=task_type,
+        exclusive_labels=exclusive_labels,
+    )
+
+
+def _create_detection_labels(project_id: str | UUID) -> list[LabelDB]:
+    """
+    Create labels for a Detection card project.
+
+    Args:
+        project_id (str | UUID): ID of the project to add labels to.
+
+    Returns:
+        list[LabelDB]: List of created label objects.
+    """
+    return [
+        LabelDB(project_id=project_id, name="Clubs", color="#2d6311", hotkey="c"),
+        LabelDB(project_id=project_id, name="Diamonds", color="#baa3b3", hotkey="d"),
+        LabelDB(project_id=project_id, name="Spades", color="#000702", hotkey="s"),
+        LabelDB(project_id=project_id, name="Hearts", color="#1f016b", hotkey="h"),
+        LabelDB(project_id=project_id, name="No_object", color="#565a84", hotkey="n"),
+    ]
+
+
+def _create_segmentation_labels(project_id: str | UUID) -> list[LabelDB]:
+    """
+    Create labels for an Instance Segmentation fish project.
+
+    Args:
+        project_id (str | UUID): ID of the project to add labels to.
+
+    Returns:
+        list[LabelDB]: List of created label objects.
+    """
+    return [
+        LabelDB(project_id=project_id, name="Fish", color="#2d6311", hotkey="f"),
+        LabelDB(project_id=project_id, name="Empty", color="#565a84", hotkey="e"),
+    ]
+
+
+def _create_pipeline_with_video_source(  # noqa: PLR0913
+    project_id: str | UUID,
+    source_id: str | UUID,
+    source_name: str,
+    video_path: str,
+    sink_id: str | UUID,
+    model_id: str,
+    model_architecture: str,
+    labels: list[LabelDB],
+) -> PipelineDB:
+    """
+    Create a pipeline with a video file source for a project.
+
+    Args:
+        project_id (str | UUID): ID of the project.
+        source_id (str | UUID): Unique identifier for the video source.
+        source_name (str): Name for the video source.
+        video_path (str): Path to the video file.
+        sink_id (str | UUID): ID of the sink to use.
+        model_id (str): Unique identifier for the model revision.
+        model_architecture (str): Architecture name of the model.
+        labels (list[LabelDB] | None): List of labels for the label schema revision.
+
+    Returns:
+        PipelineDB: Created pipeline object.
+    """
+    pipeline = PipelineDB(
+        project_id=project_id,
+        sink_id=sink_id,
+        data_collection_policies=[FixedRateDataCollectionPolicy(rate=0.1).model_dump(mode="json")],
+        is_running=True,
+    )
+
+    pipeline.source = SourceDB(
+        id=source_id,
+        name=source_name,
+        source_type=SourceType.VIDEO_FILE,
+        config_data={"video_path": video_path},
+    )
+
+    pipeline.model_revision = ModelRevisionDB(
+        id=model_id,
+        project_id=project_id,
+        architecture=model_architecture,
+        training_status=TrainingStatus.SUCCESSFUL,
+        training_started_at=datetime.now() - timedelta(hours=24),
+        training_finished_at=datetime.now() - timedelta(hours=23),
+        training_configuration={},
+        label_schema_revision={"labels": [{"id": str(label.id), "name": label.name} for label in labels]},
+    )
+    return pipeline
diff --git a/application/backend/run.sh b/application/backend/run.sh