Skip to content

Commit c604459

Browse files
committed
refactor: move transforms that only check state out of task.py
1 parent bf1d768 commit c604459

File tree

5 files changed

+190
-233
lines changed

5 files changed

+190
-233
lines changed

packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ def fake_loader(kind, path, config, parameters, loaded_tasks, write_artifacts):
3232
"i": i,
3333
"metadata": {"name": f"{kind}-t-{i}"},
3434
"deadline": "soon",
35+
"workerType": "linux",
36+
"provisionerId": "prov",
3537
},
3638
"dependencies": dependencies,
39+
"if-dependencies": [],
40+
"soft-dependencies": [],
3741
}
3842
if "task-defaults" in config:
3943
task = merge(config["task-defaults"], task)
@@ -250,6 +254,7 @@ def make_task(
250254
task_id=None,
251255
dependencies=None,
252256
if_dependencies=None,
257+
soft_dependencies=None,
253258
attributes=None,
254259
):
255260
task_def = task_def or {
@@ -260,6 +265,7 @@ def make_task(
260265
attributes=attributes or {},
261266
dependencies=dependencies or {},
262267
if_dependencies=if_dependencies or [],
268+
soft_dependencies=soft_dependencies or [],
263269
kind=kind,
264270
label=label,
265271
task=task_def,

src/taskgraph/transforms/task.py

Lines changed: 2 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import functools
1212
import hashlib
1313
import os
14-
import re
1514
import time
1615
from copy import deepcopy
1716
from dataclasses import dataclass
@@ -20,7 +19,6 @@
2019

2120
from voluptuous import All, Any, Extra, NotIn, Optional, Required
2221

23-
from taskgraph import MAX_DEPENDENCIES
2422
from taskgraph.transforms.base import TransformSequence
2523
from taskgraph.util.hash import hash_path
2624
from taskgraph.util.keyed_by import evaluate_keyed_by
@@ -44,7 +42,7 @@
4442

4543

4644
@functools.cache
47-
def _run_task_suffix():
45+
def run_task_suffix():
4846
"""String to append to cache names under control of run-task."""
4947
return hash_path(RUN_TASK)[0:20]
5048

@@ -715,7 +713,7 @@ def build_docker_worker_payload(config, task, task_def):
715713
cache_version = "v3"
716714

717715
if run_task:
718-
suffix = f"{cache_version}-{_run_task_suffix()}"
716+
suffix = f"{cache_version}-{run_task_suffix()}"
719717

720718
if out_of_tree_image:
721719
name_hash = hashlib.sha256(
@@ -763,8 +761,6 @@ def build_docker_worker_payload(config, task, task_def):
763761
if capabilities:
764762
payload["capabilities"] = capabilities
765763

766-
check_caches_are_volumes(task)
767-
768764

769765
@payload_builder(
770766
"generic-worker",
@@ -1457,135 +1453,3 @@ def chain_of_trust(config, tasks):
14571453
"task-reference": "<docker-image>"
14581454
}
14591455
yield task
1460-
1461-
1462-
@transforms.add
1463-
def check_task_identifiers(config, tasks):
1464-
"""Ensures that all tasks have well defined identifiers:
1465-
``^[a-zA-Z0-9_-]{1,38}$``
1466-
"""
1467-
e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
1468-
for task in tasks:
1469-
for attrib in ("workerType", "provisionerId"):
1470-
if not e.match(task["task"][attrib]):
1471-
raise Exception(
1472-
"task {}.{} is not a valid identifier: {}".format(
1473-
task["label"], attrib, task["task"][attrib]
1474-
)
1475-
)
1476-
yield task
1477-
1478-
1479-
@transforms.add
1480-
def check_task_dependencies(config, tasks):
1481-
"""Ensures that tasks don't have more than 100 dependencies."""
1482-
for task in tasks:
1483-
number_of_dependencies = (
1484-
len(task["dependencies"])
1485-
+ len(task["if-dependencies"])
1486-
+ len(task["soft-dependencies"])
1487-
)
1488-
if number_of_dependencies > MAX_DEPENDENCIES:
1489-
raise Exception(
1490-
"task {}/{} has too many dependencies ({} > {})".format(
1491-
config.kind,
1492-
task["label"],
1493-
number_of_dependencies,
1494-
MAX_DEPENDENCIES,
1495-
)
1496-
)
1497-
yield task
1498-
1499-
1500-
def check_caches_are_volumes(task):
1501-
"""Ensures that all cache paths are defined as volumes.
1502-
1503-
Caches and volumes are the only filesystem locations whose content
1504-
isn't defined by the Docker image itself. Some caches are optional
1505-
depending on the task environment. We want paths that are potentially
1506-
caches to have as similar behavior regardless of whether a cache is
1507-
used. To help enforce this, we require that all paths used as caches
1508-
to be declared as Docker volumes. This check won't catch all offenders.
1509-
But it is better than nothing.
1510-
"""
1511-
volumes = set(task["worker"]["volumes"])
1512-
paths = {c["mount-point"] for c in task["worker"].get("caches", [])}
1513-
missing = paths - volumes
1514-
1515-
if not missing:
1516-
return
1517-
1518-
raise Exception(
1519-
"task {} (image {}) has caches that are not declared as "
1520-
"Docker volumes: {} "
1521-
"(have you added them as VOLUMEs in the Dockerfile?)".format(
1522-
task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing))
1523-
)
1524-
)
1525-
1526-
1527-
@transforms.add
1528-
def check_run_task_caches(config, tasks):
1529-
"""Audit for caches requiring run-task.
1530-
1531-
run-task manages caches in certain ways. If a cache managed by run-task
1532-
is used by a non run-task task, it could cause problems. So we audit for
1533-
that and make sure certain cache names are exclusive to run-task.
1534-
1535-
IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
1536-
CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
1537-
THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
1538-
"""
1539-
re_reserved_caches = re.compile(
1540-
"""^
1541-
(checkouts|tooltool-cache)
1542-
""",
1543-
re.VERBOSE,
1544-
)
1545-
1546-
cache_prefix = "{trust_domain}-level-{level}-".format(
1547-
trust_domain=config.graph_config["trust-domain"],
1548-
level=config.params["level"],
1549-
)
1550-
1551-
suffix = _run_task_suffix()
1552-
1553-
for task in tasks:
1554-
payload = task["task"].get("payload", {})
1555-
command = payload.get("command") or [""]
1556-
1557-
main_command = command[0] if isinstance(command[0], str) else ""
1558-
run_task = main_command.endswith("run-task")
1559-
1560-
for cache in payload.get("cache", {}).get(
1561-
"task-reference", payload.get("cache", {})
1562-
):
1563-
if not cache.startswith(cache_prefix):
1564-
raise Exception(
1565-
"{} is using a cache ({}) which is not appropriate "
1566-
"for its trust-domain and level. It should start with {}.".format(
1567-
task["label"], cache, cache_prefix
1568-
)
1569-
)
1570-
1571-
cache = cache[len(cache_prefix) :]
1572-
1573-
if not re_reserved_caches.match(cache):
1574-
continue
1575-
1576-
if not run_task:
1577-
raise Exception(
1578-
f"{task['label']} is using a cache ({cache}) reserved for run-task "
1579-
"change the task to use run-task or use a different "
1580-
"cache name"
1581-
)
1582-
1583-
if suffix not in cache:
1584-
raise Exception(
1585-
f"{task['label']} is using a cache ({cache}) reserved for run-task "
1586-
"but the cache name is not dependent on the contents "
1587-
"of run-task; change the cache name to conform to the "
1588-
"naming requirements"
1589-
)
1590-
1591-
yield task

src/taskgraph/util/verify.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,18 @@
44

55

66
import logging
7+
import re
78
import sys
89
import warnings
910
from abc import ABC, abstractmethod
1011
from dataclasses import dataclass, field
1112
from typing import Callable, Union
1213

14+
from taskgraph import MAX_DEPENDENCIES
1315
from taskgraph.config import GraphConfig
1416
from taskgraph.parameters import Parameters
1517
from taskgraph.taskgraph import TaskGraph
18+
from taskgraph.transforms.task import run_task_suffix
1619
from taskgraph.util.attributes import match_run_on_projects
1720
from taskgraph.util.treeherder import join_symbol
1821

@@ -301,6 +304,133 @@ def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameter
301304
scratch_pad[key] = task.label
302305

303306

307+
RE_RESERVED_CACHES = re.compile(r"^(checkouts|tooltool-cache)", re.VERBOSE)
308+
309+
310+
@verifications.add("full_task_graph")
311+
def verify_run_task_caches(task, taskgraph, scratch_pad, graph_config, parameters):
312+
"""Audit for caches requiring run-task.
313+
314+
run-task manages caches in certain ways. If a cache managed by run-task
315+
is used by a non run-task task, it could cause problems. So we audit for
316+
that and make sure certain cache names are exclusive to run-task.
317+
318+
IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
319+
CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
320+
THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
321+
"""
322+
if task is None:
323+
return
324+
325+
cache_prefix = "{trust_domain}-level-{level}-".format(
326+
trust_domain=graph_config["trust-domain"],
327+
level=parameters["level"],
328+
)
329+
330+
suffix = run_task_suffix()
331+
332+
payload = task.task.get("payload", {})
333+
command = payload.get("command") or [""]
334+
335+
main_command = command[0] if isinstance(command[0], str) else ""
336+
run_task = main_command.endswith("run-task")
337+
338+
for cache in payload.get("cache", {}).get(
339+
"task-reference", payload.get("cache", {})
340+
):
341+
if not cache.startswith(cache_prefix):
342+
raise Exception(
343+
f"{task.label} is using a cache ({cache}) which is not appropriate "
344+
f"for its trust-domain and level. It should start with {cache_prefix}."
345+
)
346+
347+
cache = cache[len(cache_prefix) :]
348+
349+
if not RE_RESERVED_CACHES.match(cache):
350+
continue
351+
352+
if not run_task:
353+
raise Exception(
354+
f"{task['label']} is using a cache ({cache}) reserved for run-task "
355+
"change the task to use run-task or use a different "
356+
"cache name"
357+
)
358+
359+
if suffix not in cache:
360+
raise Exception(
361+
f"{task['label']} is using a cache ({cache}) reserved for run-task "
362+
"but the cache name is not dependent on the contents "
363+
"of run-task; change the cache name to conform to the "
364+
"naming requirements"
365+
)
366+
367+
368+
@verifications.add("full_task_graph")
369+
def verify_task_identifiers(task, taskgraph, scratch_pad, graph_config, parameters):
370+
"""Ensures that all tasks have well defined identifiers:
371+
``^[a-zA-Z0-9_-]{1,38}$``
372+
"""
373+
if task is None:
374+
return
375+
376+
e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
377+
for attrib in ("workerType", "provisionerId"):
378+
if not e.match(task.task[attrib]):
379+
raise Exception(
380+
f"task {task.label}.{attrib} is not a valid identifier: {task.task[attrib]}"
381+
)
382+
383+
384+
@verifications.add("full_task_graph")
385+
def verify_task_dependencies(task, taskgraph, scratch_pad, graph_config, parameters):
386+
"""Ensures that tasks don't have more than 100 dependencies."""
387+
if task is None:
388+
return
389+
390+
number_of_dependencies = (
391+
len(task.dependencies) + len(task.if_dependencies) + len(task.soft_dependencies)
392+
)
393+
if number_of_dependencies > MAX_DEPENDENCIES:
394+
raise Exception(
395+
f"task {task.label} has too many dependencies ({number_of_dependencies} > {MAX_DEPENDENCIES})"
396+
)
397+
398+
399+
@verifications.add("full_task_graph")
400+
def verify_caches_are_volumes(task, taskgraph, scratch_pad, graph_config, parameters):
401+
"""Ensures that all cache paths are defined as volumes.
402+
403+
Caches and volumes are the only filesystem locations whose content
404+
isn't defined by the Docker image itself. Some caches are optional
405+
depending on the task environment. We want paths that are potentially
406+
caches to have as similar behavior regardless of whether a cache is
407+
used. To help enforce this, we require that all paths used as caches
408+
to be declared as Docker volumes. This check won't catch all offenders.
409+
But it is better than nothing.
410+
"""
411+
if task is None:
412+
return
413+
414+
taskdef = task.task
415+
if taskdef.get("worker", {}).get("implementation") != "docker-worker":
416+
return
417+
418+
volumes = set(taskdef["worker"]["volumes"])
419+
paths = {c["mount-point"] for c in taskdef["worker"].get("caches", [])}
420+
missing = paths - volumes
421+
422+
if missing:
423+
raise Exception(
424+
"task {} (image {}) has caches that are not declared as "
425+
"Docker volumes: {} "
426+
"(have you added them as VOLUMEs in the Dockerfile?)".format(
427+
task.label,
428+
taskdef["worker"]["docker-image"],
429+
", ".join(sorted(missing)),
430+
)
431+
)
432+
433+
304434
@verifications.add("optimized_task_graph")
305435
def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
306436
"""

0 commit comments

Comments
 (0)