ITISFoundation
diff --git a/‎packages/postgres-database/src/simcore_postgres_database/migration/versions/e89eae27fb3f_add_comp_run_snapshot_tasks_table.py‎
Lines changed: 105 additions & 0 deletions b/‎packages/postgres-database/src/simcore_postgres_database/migration/versions/e89eae27fb3f_add_comp_run_snapshot_tasks_table.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎packages/postgres-database/src/simcore_postgres_database/models/comp_run_snapshot_tasks.py‎
Lines changed: 105 additions & 0 deletions b/‎packages/postgres-database/src/simcore_postgres_database/models/comp_run_snapshot_tasks.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎packages/postgres-database/src/simcore_postgres_database/models/comp_runs.py‎
Lines changed: 7 additions & 0 deletions b/‎packages/postgres-database/src/simcore_postgres_database/models/comp_runs.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎packages/postgres-database/src/simcore_postgres_database/utils_comp_run_snapshot_tasks.py‎
Lines changed: 62 additions & 0 deletions b/‎packages/postgres-database/src/simcore_postgres_database/utils_comp_run_snapshot_tasks.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎packages/postgres-database/src/simcore_postgres_database/utils_comp_runs.py‎
Lines changed: 77 additions & 0 deletions b/‎packages/postgres-database/src/simcore_postgres_database/utils_comp_runs.py‎
Lines changed: 77 additions & 0 deletions
@@ -0,0 +1,105 @@
+"""add comp_run_snapshot_tasks table
+
+Revision ID: e89eae27fb3f
+Revises: 278daef7e99d
+Create Date: 2025-05-29 16:52:00.435268+00:00
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "e89eae27fb3f"
+down_revision = "278daef7e99d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "comp_run_snapshot_tasks",
+        sa.Column("snapshot_task_id", sa.Integer(), nullable=False),
+        sa.Column("run_id", sa.Integer(), nullable=False),
+        sa.Column("project_id", sa.String(), nullable=True),
+        sa.Column("node_id", sa.String(), nullable=True),
+        # sa.Column('node_class', sa.Enum('COMPUTATIONAL', 'INTERACTIVE', 'FRONTEND', name='nodeclass'), nullable=True),
+        sa.Column("job_id", sa.String(), nullable=True),
+        sa.Column("internal_id", sa.Integer(), nullable=True),
+        sa.Column("schema", sa.JSON(), nullable=True),
+        sa.Column("inputs", sa.JSON(), nullable=True),
+        sa.Column("outputs", sa.JSON(), nullable=True),
+        sa.Column("run_hash", sa.String(), nullable=True),
+        sa.Column("image", sa.JSON(), nullable=True),
+        # sa.Column('state', sa.Enum('NOT_STARTED', 'PUBLISHED', 'PENDING', 'RUNNING', 'SUCCESS', 'FAILED', 'ABORTED', 'WAITING_FOR_RESOURCES', 'WAITING_FOR_CLUSTER', name='statetype'), server_default='NOT_STARTED', nullable=False),
+        sa.Column("errors", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("progress", sa.Numeric(precision=3, scale=2), nullable=True),
+        sa.Column("start", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("end", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
+        sa.Column(
+            "created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "modified",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "pricing_info", postgresql.JSONB(astext_type=sa.Text()), nullable=True
+        ),
+        sa.Column(
+            "hardware_info", postgresql.JSONB(astext_type=sa.Text()), nullable=True
+        ),
+        sa.ForeignKeyConstraint(
+            ["run_id"],
+            ["comp_runs.run_id"],
+            name="fk_snapshot_tasks_to_comp_runs",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("snapshot_task_id"),
+    )
+    op.add_column(
+        "comp_runs",
+        sa.Column(
+            "dag_adjacency_list",
+            postgresql.JSONB(astext_type=sa.Text()),
+            server_default=sa.text("'{}'::jsonb"),
+            nullable=False,
+        ),
+    )
+    # ### end Alembic commands ###
+    op.execute("ALTER TABLE comp_run_snapshot_tasks ADD COLUMN node_class nodeclass;")
+    op.execute("ALTER TABLE comp_run_snapshot_tasks ADD COLUMN state statetype;")
+
+    op.alter_column(
+        "comp_run_snapshot_tasks",
+        "state",
+        existing_type=postgresql.ENUM(
+            "NOT_STARTED",
+            "PUBLISHED",
+            "PENDING",
+            "RUNNING",
+            "SUCCESS",
+            "FAILED",
+            "ABORTED",
+            "WAITING_FOR_RESOURCES",
+            "WAITING_FOR_CLUSTER",
+            name="statetype",
+        ),
+        nullable=False,
+    )
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column("comp_runs", "dag_adjacency_list")
+    op.drop_table("comp_run_snapshot_tasks")
+    # ### end Alembic commands ###
@@ -0,0 +1,105 @@
+"""Computational Tasks Table"""
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from ._common import (
+    RefActions,
+    column_created_datetime,
+    column_modified_datetime,
+)
+from .base import metadata
+from .comp_pipeline import StateType
+from .comp_runs import comp_runs
+from .comp_tasks import NodeClass
+
+comp_run_snapshot_tasks = sa.Table(
+    "comp_run_snapshot_tasks",
+    metadata,
+    sa.Column(
+        "snapshot_task_id",
+        sa.Integer,
+        primary_key=True,
+    ),
+    sa.Column(
+        "run_id",
+        sa.Integer,
+        sa.ForeignKey(
+            comp_runs.c.run_id,
+            name="fk_snapshot_tasks_to_comp_runs",
+            onupdate=RefActions.CASCADE,
+            ondelete=RefActions.CASCADE,
+        ),
+        nullable=False,
+    ),
+    sa.Column(
+        "project_id",
+        sa.String,
+        doc="Project that contains the node associated to this task",
+    ),
+    sa.Column("node_id", sa.String, doc="Node associated to this task"),
+    sa.Column(
+        "node_class",
+        sa.Enum(NodeClass, name="nodeclass"),
+        doc="Classification of the node associated to this task",
+    ),
+    sa.Column("job_id", sa.String, doc="Worker job ID for this task"),
+    sa.Column("internal_id", sa.Integer, doc="DEV: only for development. From 1 to N"),
+    sa.Column("schema", sa.JSON, doc="Schema for inputs and outputs"),
+    sa.Column("inputs", sa.JSON, doc="Input values"),
+    sa.Column("outputs", sa.JSON, doc="Output values"),
+    sa.Column(
+        "run_hash",
+        sa.String,
+        nullable=True,
+        doc="Hashes inputs before run. Used to detect changes in inputs.",
+    ),
+    sa.Column(
+        "image", sa.JSON, doc="Metadata about service image associated to this node"
+    ),
+    sa.Column(
+        "state",
+        sa.Enum(StateType, name="statetype"),
+        nullable=False,
+        server_default=StateType.NOT_STARTED.value,
+        doc="Current state in the task lifecycle",
+    ),
+    sa.Column(
+        "errors",
+        postgresql.JSONB,
+        nullable=True,
+        doc="List[models_library.errors.ErrorDict] with error information"
+        " for a failing state, otherwise set to None",
+    ),
+    sa.Column(
+        "progress",
+        sa.Numeric(precision=3, scale=2),  # numbers from 0.00 and 1.00
+        nullable=True,
+        doc="current progress of the task if available",
+    ),
+    sa.Column(
+        "start", sa.DateTime(timezone=True), doc="UTC timestamp when task started"
+    ),
+    sa.Column(
+        "end", sa.DateTime(timezone=True), doc="UTC timestamp for task completion"
+    ),
+    sa.Column(
+        "last_heartbeat",
+        sa.DateTime(timezone=True),
+        doc="UTC timestamp for last task running check",
+    ),
+    column_created_datetime(timezone=True),
+    column_modified_datetime(timezone=True),
+    sa.Column(
+        "pricing_info",
+        postgresql.JSONB,
+        nullable=True,
+        doc="Billing information of this task",
+    ),
+    sa.Column(
+        "hardware_info",
+        postgresql.JSONB,
+        nullable=True,
+        doc="Harware information of this task",
+    ),
+)
@@ -98,6 +98,13 @@
         nullable=False,
         doc="the run uses on demand clusters",
     ),
+    sa.Column(
+        "dag_adjacency_list",
+        JSONB,
+        doc="Adjacency list for the pipeline's graph",
+        server_default=sa.text("'{}'::jsonb"),
+        nullable=False,
+    ),
     sa.UniqueConstraint("project_uuid", "user_id", "iteration"),
     sa.Index("ix_comp_runs_user_id", "user_id"),
 )
@@ -0,0 +1,62 @@
+from typing import Any
+
+import sqlalchemy as sa
+from pydantic import PositiveInt
+from sqlalchemy.engine.row import Row
+from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
+
+from .models.comp_run_snapshot_tasks import comp_run_snapshot_tasks
+from .utils_repos import pass_or_acquire_connection
+
+COMP_RUN_SNAPSHOT_TASKS_DB_COLS = (
+    comp_run_snapshot_tasks.c.snapshot_task_id,
+    comp_run_snapshot_tasks.c.run_id,
+    comp_run_snapshot_tasks.c.project_id,
+    comp_run_snapshot_tasks.c.node_id,
+    comp_run_snapshot_tasks.c.node_class,
+    comp_run_snapshot_tasks.c.job_id,
+    comp_run_snapshot_tasks.c.internal_id,
+    comp_run_snapshot_tasks.c.schema,
+    comp_run_snapshot_tasks.c.inputs,
+    comp_run_snapshot_tasks.c.outputs,
+    comp_run_snapshot_tasks.c.run_hash,
+    comp_run_snapshot_tasks.c.image,
+    comp_run_snapshot_tasks.c.state,
+    comp_run_snapshot_tasks.c.errors,
+    comp_run_snapshot_tasks.c.progress,
+    comp_run_snapshot_tasks.c.start,
+    comp_run_snapshot_tasks.c.end,
+    comp_run_snapshot_tasks.c.last_heartbeat,
+    comp_run_snapshot_tasks.c.created,
+    comp_run_snapshot_tasks.c.modified,
+    comp_run_snapshot_tasks.c.pricing_info,
+    comp_run_snapshot_tasks.c.hardware_info,
+)
+
+
+async def update_for_run_id_and_node_id(
+    engine: AsyncEngine,
+    conn: AsyncConnection | None = None,
+    *,
+    run_id: PositiveInt,
+    node_id: str,
+    data: dict[str, Any],
+) -> Row:
+    async with pass_or_acquire_connection(engine, connection=conn) as _conn:
+        result = await _conn.execute(
+            comp_run_snapshot_tasks.update()
+            .values(
+                **data,
+                modified=sa.func.now(),
+            )
+            .where(
+                (comp_run_snapshot_tasks.c.run_id == run_id)
+                & (comp_run_snapshot_tasks.c.node_id == node_id)
+            )
+            .returning(*COMP_RUN_SNAPSHOT_TASKS_DB_COLS)
+        )
+        row = result.one_or_none()
+        if row is None:
+            msg = f"update for run_id={run_id} and node_id={node_id} did not return any row"
+            raise ValueError(msg)
+        return row
@@ -0,0 +1,77 @@
+import logging
+from typing import cast
+
+import sqlalchemy as sa
+from pydantic import PositiveInt
+from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
+
+from .models.comp_runs import comp_runs
+from .utils_repos import pass_or_acquire_connection
+
+_logger = logging.getLogger(__name__)
+
+
+async def get_latest_run_id_for_project(
+    engine: AsyncEngine,
+    conn: AsyncConnection | None = None,
+    *,
+    project_id: str,
+) -> PositiveInt | None:
+    # Get latest run per (project_uuid, user_id)
+    project_and_user_latest_runs = (
+        sa.select(
+            comp_runs.c.project_uuid,
+            comp_runs.c.user_id,
+            sa.func.max(comp_runs.c.iteration).label("latest_iteration"),
+            sa.func.max(comp_runs.c.created).label("created"),
+        )
+        .where(comp_runs.c.project_uuid == project_id)
+        .group_by(comp_runs.c.project_uuid, comp_runs.c.user_id)
+        .subquery("project_and_user_latest_runs")
+    )
+
+    # Rank users per project by latest run creation time
+    ranked = sa.select(
+        project_and_user_latest_runs.c.project_uuid,
+        project_and_user_latest_runs.c.user_id,
+        project_and_user_latest_runs.c.latest_iteration,
+        project_and_user_latest_runs.c.created,
+        sa.func.row_number()
+        .over(
+            partition_by=project_and_user_latest_runs.c.project_uuid,
+            order_by=project_and_user_latest_runs.c.created.desc(),
+        )
+        .label("row_number"),
+    ).subquery("ranked")
+
+    # Filter to only the top-ranked (most recent) user per project
+    filtered_ranked = (
+        sa.select(
+            ranked.c.project_uuid,
+            ranked.c.user_id,
+            ranked.c.latest_iteration,
+        )
+        .where(ranked.c.row_number == 1)
+        .subquery("filtered_ranked")
+    )
+
+    # Base select query
+    base_select_query = sa.select(comp_runs.c.run_id).select_from(
+        filtered_ranked.join(
+            comp_runs,
+            sa.and_(
+                comp_runs.c.project_uuid == filtered_ranked.c.project_uuid,
+                comp_runs.c.user_id == filtered_ranked.c.user_id,
+                comp_runs.c.iteration == filtered_ranked.c.latest_iteration,
+            ),
+        )
+    )
+
+    async with pass_or_acquire_connection(engine, connection=conn) as _conn:
+        result = await _conn.execute(base_select_query)
+        row = result.one_or_none()
+        if not row:
+            msg = f"get_latest_run_id_for_project did not return any row for project_id={project_id} (MD: I think this should not happen, but if it happens contact MD/SAN)"
+            _logger.error(msg)
+            return None
+        return cast(PositiveInt, row.run_id)