PolideaInternal
diff --git a/‎airflow-core/docs/howto/deadline-alerts.rst‎
Lines changed: 53 additions & 0 deletions b/‎airflow-core/docs/howto/deadline-alerts.rst‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎airflow-core/src/airflow/models/deadline.py‎
Lines changed: 94 additions & 4 deletions b/‎airflow-core/src/airflow/models/deadline.py‎
Lines changed: 94 additions & 4 deletions
diff --git a/‎airflow-core/src/airflow/serialization/serialized_objects.py‎
Lines changed: 13 additions & 11 deletions b/‎airflow-core/src/airflow/serialization/serialized_objects.py‎
Lines changed: 13 additions & 11 deletions
@@ -104,6 +104,58 @@ Airflow provides several built-in reference points that you can use with Deadlin
 ``DeadlineReference.FIXED_DATETIME``
     Specifies a fixed point in time. Useful when Dags must complete by a specific time.
 
+``DeadlineReference.AVERAGE_RUNTIME``
+    Calculates deadlines based on the average runtime of previous DAG runs. This reference
+    analyzes historical execution data to predict when the current run should complete.
+    The deadline is set to the current time plus the calculated average runtime plus the interval.
+    If insufficient historical data exists, no deadline is created.
+
+    Parameters:
+        * ``max_runs`` (int, optional): Maximum number of recent DAG runs to analyze. Defaults to 10.
+        * ``min_runs`` (int, optional): Minimum number of completed runs required to calculate average. Defaults to same value as ``max_runs``.
+
+    Example usage:
+
+    .. code-block:: python
+
+        # Use default settings (analyze up to 10 runs, require 10 runs)
+        DeadlineReference.AVERAGE_RUNTIME()
+
+        # Analyze up to 20 runs but calculate with minimum 5 runs
+        DeadlineReference.AVERAGE_RUNTIME(max_runs=20, min_runs=5)
+
+        # Strict: require exactly 15 runs to calculate
+        DeadlineReference.AVERAGE_RUNTIME(max_runs=15, min_runs=15)
+
+Here's an example using average runtime:
+
+.. code-block:: python
+
+    with DAG(
+        dag_id="average_runtime_deadline",
+        deadline=DeadlineAlert(
+            reference=DeadlineReference.AVERAGE_RUNTIME(max_runs=15, min_runs=5),
+            interval=timedelta(minutes=30),  # Alert if 30 minutes past average runtime
+            callback=AsyncCallback(
+                SlackWebhookNotifier,
+                kwargs={"text": "🚨 DAG {{ dag_run.dag_id }} is running longer than expected!"},
+            ),
+        ),
+    ):
+        EmptyOperator(task_id="data_processing")
+
+If the calculated historical average was 30 minutes, the timeline for this example would look like this:
+
+::
+
+    |------|----------|--------------|--------------|--------|
+         Queued     Start            |           Deadline
+         09:00      09:05          09:35          10:05
+                      |              |              |
+                      |--- Average --|-- Interval --|
+                           (30 min)      (30 min)
+
+
 Here's an example using a fixed datetime:
 
 .. code-block:: python
@@ -166,6 +218,7 @@ Here's an example using the Slack Notifier if the Dag run has not finished withi
     ):
         EmptyOperator(task_id="example_task")
 
+
 Creating Custom Callbacks
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
@@ -26,7 +26,7 @@
 
 import sqlalchemy_jsonfield
 import uuid6
-from sqlalchemy import Column, ForeignKey, Index, Integer, String, and_, select
+from sqlalchemy import Column, ForeignKey, Index, Integer, String, and_, func, select, text
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import relationship
 from sqlalchemy_utils import UUIDType
@@ -283,7 +283,7 @@ class BaseDeadlineReference(LoggingMixin, ABC):
         def reference_name(cls: Any) -> str:
             return cls.__name__
 
-        def evaluate_with(self, *, session: Session, interval: timedelta, **kwargs: Any) -> datetime:
+        def evaluate_with(self, *, session: Session, interval: timedelta, **kwargs: Any) -> datetime | None:
             """Validate the provided kwargs and evaluate this deadline with the given conditions."""
             filtered_kwargs = {k: v for k, v in kwargs.items() if k in self.required_kwargs}
 
@@ -295,10 +295,11 @@ def evaluate_with(self, *, session: Session, interval: timedelta, **kwargs: Any)
             if extra_kwargs := kwargs.keys() - filtered_kwargs.keys():
                 self.log.debug("Ignoring unexpected parameters: %s", ", ".join(extra_kwargs))
 
-            return self._evaluate_with(session=session, **filtered_kwargs) + interval
+            base_time = self._evaluate_with(session=session, **filtered_kwargs)
+            return base_time + interval if base_time is not None else None
 
         @abstractmethod
-        def _evaluate_with(self, *, session: Session, **kwargs: Any) -> datetime:
+        def _evaluate_with(self, *, session: Session, **kwargs: Any) -> datetime | None:
             """Must be implemented by subclasses to perform the actual evaluation."""
             raise NotImplementedError
 
@@ -366,6 +367,95 @@ def _evaluate_with(self, *, session: Session, **kwargs: Any) -> datetime:
 
             return _fetch_from_db(DagRun.queued_at, session=session, **kwargs)
 
+    @dataclass
+    class AverageRuntimeDeadline(BaseDeadlineReference):
+        """A deadline that calculates the average runtime from past DAG runs."""
+
+        DEFAULT_LIMIT = 10
+        max_runs: int
+        min_runs: int | None = None
+        required_kwargs = {"dag_id"}
+
+        def __post_init__(self):
+            if self.min_runs is None:
+                self.min_runs = self.max_runs
+            if self.min_runs < 1:
+                raise ValueError("min_runs must be at least 1")
+
+        @provide_session
+        def _evaluate_with(self, *, session: Session, **kwargs: Any) -> datetime | None:
+            from airflow.models import DagRun
+
+            dag_id = kwargs["dag_id"]
+
+            # Get database dialect to use appropriate time difference calculation
+            dialect = session.bind.dialect.name
+
+            # Create database-specific expression for calculating duration in seconds
+            if dialect == "postgresql":
+                duration_expr = func.extract("epoch", DagRun.end_date - DagRun.start_date)
+            elif dialect == "mysql":
+                # Use TIMESTAMPDIFF to get exact seconds like PostgreSQL EXTRACT(epoch FROM ...)
+                duration_expr = func.timestampdiff(text("SECOND"), DagRun.start_date, DagRun.end_date)
+            elif dialect == "sqlite":
+                duration_expr = (func.julianday(DagRun.end_date) - func.julianday(DagRun.start_date)) * 86400
+            else:
+                raise ValueError(f"Unsupported database dialect: {dialect}")
+
+            # Query for completed DAG runs with both start and end dates
+            # Order by logical_date descending to get most recent runs first
+            query = (
+                select(duration_expr)
+                .filter(DagRun.dag_id == dag_id, DagRun.start_date.isnot(None), DagRun.end_date.isnot(None))
+                .order_by(DagRun.logical_date.desc())
+            )
+
+            # Apply max_runs
+            query = query.limit(self.max_runs)
+
+            # Get all durations and calculate average
+            durations = session.execute(query).scalars().all()
+
+            if len(durations) < cast("int", self.min_runs):
+                logger.info(
+                    "Only %d completed DAG runs found for dag_id: %s (need %d), skipping deadline creation",
+                    len(durations),
+                    dag_id,
+                    self.min_runs,
+                )
+                return None
+            # Convert to float to handle Decimal types from MySQL while preserving precision
+            # Use Decimal arithmetic for higher precision, then convert to float
+            from decimal import Decimal
+
+            decimal_durations = [Decimal(str(d)) for d in durations]
+            avg_seconds = float(sum(decimal_durations) / len(decimal_durations))
+            logger.info(
+                "Average runtime for dag_id %s (from %d runs): %.2f seconds",
+                dag_id,
+                len(durations),
+                avg_seconds,
+            )
+            return timezone.utcnow() + timedelta(seconds=avg_seconds)
+
+        def serialize_reference(self) -> dict:
+            return {
+                ReferenceModels.REFERENCE_TYPE_FIELD: self.reference_name,
+                "max_runs": self.max_runs,
+                "min_runs": self.min_runs,
+            }
+
+        @classmethod
+        def deserialize_reference(cls, reference_data: dict):
+            max_runs = reference_data.get("max_runs", cls.DEFAULT_LIMIT)
+            min_runs = reference_data.get("min_runs", max_runs)
+            if min_runs < 1:
+                raise ValueError("min_runs must be at least 1")
+            return cls(
+                max_runs=max_runs,
+                min_runs=min_runs,
+            )
+
 
 DeadlineReferenceType = ReferenceModels.BaseDeadlineReference
 
 
@@ -3270,18 +3270,20 @@ def create_dagrun(
         if self.deadline:
             for deadline in cast("list", self.deadline):
                 if isinstance(deadline.reference, DeadlineReference.TYPES.DAGRUN):
-                    session.add(
-                        Deadline(
-                            deadline_time=deadline.reference.evaluate_with(
-                                session=session,
-                                interval=deadline.interval,
-                                dag_id=self.dag_id,
-                                run_id=run_id,
-                            ),
-                            callback=deadline.callback,
-                            dagrun_id=orm_dagrun.id,
-                        )
+                    deadline_time = deadline.reference.evaluate_with(
+                        session=session,
+                        interval=deadline.interval,
+                        dag_id=self.dag_id,
+                        run_id=run_id,
                     )
+                    if deadline_time is not None:
+                        session.add(
+                            Deadline(
+                                deadline_time=deadline_time,
+                                callback=deadline.callback,
+                                dagrun_id=orm_dagrun.id,
+                            )
+                        )
 
         return orm_dagrun