Skip to content

Commit b7fe726

Browse files
Add validation for pool names to prevent InvalidStatsNameException (#59938)
* Add Validation for pool names to prevent InvalidStatsNameException Pool names can now only contain ASCII alphabets, numbers , underscores,dots,and dashes to ensure compatibility with stats naming requirments. fixes #59935 * Add unit Tests and news fragment for pool name validation * Fix test file with proper database session handling * Fix test assertions to use pytest.raises match parameter * Add newline at the end of pool.py file * Fix trailing whitespace and quote style in pool.py * Fix syntax error: use consistent double quotes in regex * Add missing newline and __init__.py file * Add missing newline and __init__.py with license * Fix __init__.py with proper license and single newline * Implement pool name normalization for stats reporting Following @potiuk's feedback, changed approach from preventing invalid pool names to normalizing them for stats reporting. Changes: - Replaced validate_pool_name() with normalize_pool_name_for_stats() - Pool names with invalid characters are normalized (replaced with _) when emitting metrics - Logs warning when normalization occurs, suggesting pool rename - Removed validation from create_or_update_pool() - Updated scheduler_job_runner.py to use normalized names for stats - Removed validation tests - Updated news fragment This prevents InvalidStatsNameException without breaking existing pools with invalid names. Fixes #59935 * Implement pool name normalization for stats reporting Following @potiuk's feedback, changed approach from preventing invalid pool names to normalizing them for stats reporting. Changes: - Added normalize_pool_name_for_stats() function in pool.py - Pool names with invalid characters are normalized (replaced with _) when emitting metrics in scheduler_job_runner.py - Logs warning when normalization occurs - Removed validation tests - Removed accidental fix_quote.py file - Updated news fragment This prevents InvalidStatsNameException without breaking existing pools. Fixes #59935 * Add missing 're' module import * Remove duplicate import * Apply CI formatting fixes - import order and blank lines * Fix formatting - add proper blank lines per ruff format requirements
1 parent 82f01bf commit b7fe726

File tree

5 files changed

+80
-10
lines changed

5 files changed

+80
-10
lines changed

airflow-core/src/airflow/jobs/scheduler_job_runner.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
from airflow.models.dagbundle import DagBundleModel
7575
from airflow.models.dagrun import DagRun
7676
from airflow.models.dagwarning import DagWarning, DagWarningType
77+
from airflow.models.pool import normalize_pool_name_for_stats
7778
from airflow.models.serialized_dag import SerializedDagModel
7879
from airflow.models.taskinstance import TaskInstance
7980
from airflow.models.team import Team
@@ -2534,11 +2535,12 @@ def _emit_pool_metrics(self, session: Session = NEW_SESSION) -> None:
25342535
with DebugTrace.start_span(span_name="emit_pool_metrics", component="SchedulerJobRunner") as span:
25352536
pools = Pool.slots_stats(session=session)
25362537
for pool_name, slot_stats in pools.items():
2537-
Stats.gauge(f"pool.open_slots.{pool_name}", slot_stats["open"])
2538-
Stats.gauge(f"pool.queued_slots.{pool_name}", slot_stats["queued"])
2539-
Stats.gauge(f"pool.running_slots.{pool_name}", slot_stats["running"])
2540-
Stats.gauge(f"pool.deferred_slots.{pool_name}", slot_stats["deferred"])
2541-
Stats.gauge(f"pool.scheduled_slots.{pool_name}", slot_stats["scheduled"])
2538+
normalized_pool_name = normalize_pool_name_for_stats(pool_name)
2539+
Stats.gauge(f"pool.open_slots.{normalized_pool_name}", slot_stats["open"])
2540+
Stats.gauge(f"pool.queued_slots.{normalized_pool_name}", slot_stats["queued"])
2541+
Stats.gauge(f"pool.running_slots.{normalized_pool_name}", slot_stats["running"])
2542+
Stats.gauge(f"pool.deferred_slots.{normalized_pool_name}", slot_stats["deferred"])
2543+
Stats.gauge(f"pool.scheduled_slots.{normalized_pool_name}", slot_stats["scheduled"])
25422544

25432545
# Same metrics with tagging
25442546
Stats.gauge("pool.open_slots", slot_stats["open"], tags={"pool_name": pool_name})
@@ -2550,11 +2552,11 @@ def _emit_pool_metrics(self, session: Session = NEW_SESSION) -> None:
25502552
span.set_attributes(
25512553
{
25522554
"category": "scheduler",
2553-
f"pool.open_slots.{pool_name}": slot_stats["open"],
2554-
f"pool.queued_slots.{pool_name}": slot_stats["queued"],
2555-
f"pool.running_slots.{pool_name}": slot_stats["running"],
2556-
f"pool.deferred_slots.{pool_name}": slot_stats["deferred"],
2557-
f"pool.scheduled_slots.{pool_name}": slot_stats["scheduled"],
2555+
f"pool.open_slots.{normalized_pool_name}": slot_stats["open"],
2556+
f"pool.queued_slots.{normalized_pool_name}": slot_stats["queued"],
2557+
f"pool.running_slots.{normalized_pool_name}": slot_stats["running"],
2558+
f"pool.deferred_slots.{normalized_pool_name}": slot_stats["deferred"],
2559+
f"pool.scheduled_slots.{normalized_pool_name}": slot_stats["scheduled"],
25582560
}
25592561
)
25602562

airflow-core/src/airflow/models/pool.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# under the License.
1818
from __future__ import annotations
1919

20+
import logging
21+
import re
2022
from collections.abc import Sequence
2123
from typing import TYPE_CHECKING, Any, TypedDict
2224

@@ -35,6 +37,37 @@
3537
from sqlalchemy.orm.session import Session
3638
from sqlalchemy.sql import Select
3739

40+
logger = logging.getLogger(__name__)
41+
42+
43+
def normalize_pool_name_for_stats(name: str) -> str:
44+
"""
45+
Normalize pool name for stats reporting by replacing invalid characters.
46+
47+
Stats names must only contain ASCII alphabets, numbers, underscores, dots, and dashes.
48+
Invalid characters are replaced with underscores.
49+
50+
:param name: The pool name to normalize
51+
:return: Normalized pool name safe for stats reporting
52+
"""
53+
# Check if normalization is needed
54+
if re.match(r"^[a-zA-Z0-9_.-]+$", name):
55+
return name
56+
57+
# Replace invalid characters with underscores
58+
normalized = re.sub(r"[^a-zA-Z0-9_.-]", "_", name)
59+
60+
# Log warning
61+
logger.warning(
62+
"Pool name '%s' contains invalid characters for stats reporting. "
63+
"Reporting stats with normalized name '%s'. "
64+
"Consider renaming the pool to avoid this warning.",
65+
name,
66+
normalized,
67+
)
68+
69+
return normalized
70+
3871

3972
class PoolStats(TypedDict):
4073
"""Dictionary containing Pool Stats."""

newsfragments/59938.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Pool names with invalid characters for stats reporting are now automatically normalized (invalid characters replaced with underscores) when emitting metrics, preventing ``InvalidStatsNameException``. A warning is logged when normalization occurs, suggesting the pool be renamed.

tests/models/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.

tests/models/test_pool.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from __future__ import annotations

0 commit comments

Comments
 (0)