Skip to content

Commit 829193f

Browse files
authored
Support CGroup v2 on Supervised with manual restarts (#5419)
1 parent 1f89311 commit 829193f

File tree

8 files changed

+341
-11
lines changed

8 files changed

+341
-11
lines changed

supervisor/addons/addon.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ def __init__(self, coresys: CoreSys, slug: str):
148148
self._boot_failed_issue = Issue(
149149
IssueType.BOOT_FAIL, ContextType.ADDON, reference=self.slug
150150
)
151+
self._device_access_missing_issue = Issue(
152+
IssueType.DEVICE_ACCESS_MISSING, ContextType.ADDON, reference=self.slug
153+
)
151154

152155
def __repr__(self) -> str:
153156
"""Return internal representation."""
@@ -158,6 +161,11 @@ def boot_failed_issue(self) -> Issue:
158161
"""Get issue used if start on boot failed."""
159162
return self._boot_failed_issue
160163

164+
@property
165+
def device_access_missing_issue(self) -> Issue:
166+
"""Get issue used if device access is missing and can't be automatically added."""
167+
return self._device_access_missing_issue
168+
161169
@property
162170
def state(self) -> AddonState:
163171
"""Return state of the add-on."""
@@ -182,6 +190,13 @@ def state(self, new_state: AddonState) -> None:
182190
):
183191
self.sys_resolution.dismiss_issue(self.boot_failed_issue)
184192

193+
# Dismiss device access missing issue if present and we stopped
194+
if (
195+
new_state == AddonState.STOPPED
196+
and self.device_access_missing_issue in self.sys_resolution.issues
197+
):
198+
self.sys_resolution.dismiss_issue(self.device_access_missing_issue)
199+
185200
self.sys_homeassistant.websocket.send_message(
186201
{
187202
ATTR_TYPE: WSType.SUPERVISOR_EVENT,

supervisor/docker/addon.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pathlib import Path
1111
from typing import TYPE_CHECKING
1212

13+
from attr import evolve
1314
from awesomeversion import AwesomeVersion
1415
import docker
1516
from docker.types import Mount
@@ -40,7 +41,7 @@
4041
from ..hardware.data import Device
4142
from ..jobs.const import JobCondition, JobExecutionLimit
4243
from ..jobs.decorator import Job
43-
from ..resolution.const import ContextType, IssueType, SuggestionType
44+
from ..resolution.const import CGROUP_V2_VERSION, ContextType, IssueType, SuggestionType
4445
from ..utils.sentry import capture_exception
4546
from .const import (
4647
ENV_TIME,
@@ -802,6 +803,13 @@ async def stop(self, remove_container: bool = True) -> None:
802803

803804
await super().stop(remove_container)
804805

806+
# If there is a device access issue and the container is removed, clear it
807+
if (
808+
remove_container
809+
and self.addon.device_access_missing_issue in self.sys_resolution.issues
810+
):
811+
self.sys_resolution.dismiss_issue(self.addon.device_access_missing_issue)
812+
805813
async def _validate_trust(
806814
self, image_id: str, image: str, version: AwesomeVersion
807815
) -> None:
@@ -839,6 +847,16 @@ async def _hardware_events(self, device: Device) -> None:
839847
f"Can't process Hardware Event on {self.name}: {err!s}", _LOGGER.error
840848
) from err
841849

850+
if (
851+
self.sys_docker.info.cgroup == CGROUP_V2_VERSION
852+
and not self.sys_os.available
853+
):
854+
self.sys_resolution.add_issue(
855+
evolve(self.addon.device_access_missing_issue),
856+
suggestions=[SuggestionType.EXECUTE_RESTART],
857+
)
858+
return
859+
842860
permission = self.sys_hardware.policy.get_cgroups_rule(device)
843861
try:
844862
await self.sys_dbus.agent.cgroup.add_devices_allowed(

supervisor/resolution/const.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
DNS_CHECK_HOST = "_checkdns.home-assistant.io"
1616
DNS_ERROR_NO_DATA = 1
1717

18+
CGROUP_V1_VERSION = "1"
19+
CGROUP_V2_VERSION = "2"
20+
1821

1922
class ContextType(StrEnum):
2023
"""Place where somethings was happening."""
@@ -77,6 +80,7 @@ class IssueType(StrEnum):
7780
CORRUPT_FILESYSTEM = "corrupt_filesystem"
7881
DETACHED_ADDON_MISSING = "detached_addon_missing"
7982
DETACHED_ADDON_REMOVED = "detached_addon_removed"
83+
DEVICE_ACCESS_MISSING = "device_access_missing"
8084
DISABLED_DATA_DISK = "disabled_data_disk"
8185
DNS_LOOP = "dns_loop"
8286
DNS_SERVER_FAILED = "dns_server_failed"
@@ -112,6 +116,7 @@ class SuggestionType(StrEnum):
112116
EXECUTE_REMOVE = "execute_remove"
113117
EXECUTE_REPAIR = "execute_repair"
114118
EXECUTE_RESET = "execute_reset"
119+
EXECUTE_RESTART = "execute_restart"
115120
EXECUTE_START = "execute_start"
116121
EXECUTE_STOP = "execute_stop"
117122
EXECUTE_UPDATE = "execute_update"

supervisor/resolution/evaluations/cgroup.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@
22

33
from ...const import CoreState
44
from ...coresys import CoreSys
5-
from ..const import UnsupportedReason
5+
from ..const import CGROUP_V1_VERSION, CGROUP_V2_VERSION, UnsupportedReason
66
from .base import EvaluateBase
77

8-
CGROUP_V1_VERSION = "1"
9-
CGROUP_V2_VERSION = "2"
10-
118

129
def setup(coresys: CoreSys) -> EvaluateBase:
1310
"""Initialize evaluation-setup function."""
@@ -20,9 +17,7 @@ class EvaluateCGroupVersion(EvaluateBase):
2017
@property
2118
def expected_versions(self) -> set[str]:
2219
"""Return expected cgroup versions."""
23-
if self.coresys.os.available:
24-
return {CGROUP_V1_VERSION, CGROUP_V2_VERSION}
25-
return {CGROUP_V1_VERSION}
20+
return {CGROUP_V1_VERSION, CGROUP_V2_VERSION}
2621

2722
@property
2823
def reason(self) -> UnsupportedReason:
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Helpers to fix addon by restarting it."""
2+
3+
import logging
4+
5+
from ...coresys import CoreSys
6+
from ...exceptions import AddonsError, ResolutionFixupError
7+
from ..const import ContextType, IssueType, SuggestionType
8+
from .base import FixupBase
9+
10+
_LOGGER: logging.Logger = logging.getLogger(__name__)
11+
12+
13+
def setup(coresys: CoreSys) -> FixupBase:
14+
"""Check setup function."""
15+
return FixupAddonExecuteRestart(coresys)
16+
17+
18+
class FixupAddonExecuteRestart(FixupBase):
19+
"""Storage class for fixup."""
20+
21+
async def process_fixup(self, reference: str | None = None) -> None:
22+
"""Initialize the fixup class."""
23+
if not (addon := self.sys_addons.get(reference, local_only=True)):
24+
_LOGGER.info("Cannot restart addon %s as it does not exist", reference)
25+
return
26+
27+
# Stop addon
28+
try:
29+
await addon.stop()
30+
except AddonsError as err:
31+
_LOGGER.error("Could not stop %s due to %s", reference, err)
32+
raise ResolutionFixupError() from None
33+
34+
# Start addon
35+
# Removing the container has already fixed the issue and dismissed it
36+
# So any errors on startup are just logged. We won't wait on the startup task either
37+
try:
38+
await addon.start()
39+
except AddonsError as err:
40+
_LOGGER.error("Could not restart %s due to %s", reference, err)
41+
42+
@property
43+
def suggestion(self) -> SuggestionType:
44+
"""Return a SuggestionType enum."""
45+
return SuggestionType.EXECUTE_RESTART
46+
47+
@property
48+
def context(self) -> ContextType:
49+
"""Return a ContextType enum."""
50+
return ContextType.ADDON
51+
52+
@property
53+
def issues(self) -> list[IssueType]:
54+
"""Return a IssueType enum list."""
55+
return [IssueType.DEVICE_ACCESS_MISSING]
56+
57+
@property
58+
def auto(self) -> bool:
59+
"""Return if a fixup can be apply as auto fix."""
60+
return False

tests/docker/test_addon.py

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Test docker addon setup."""
22

3+
import asyncio
34
from ipaddress import IPv4Address
5+
from pathlib import Path
46
from typing import Any
57
from unittest.mock import MagicMock, Mock, PropertyMock, patch
68

@@ -12,12 +14,17 @@
1214
from supervisor.addons.addon import Addon
1315
from supervisor.addons.model import Data
1416
from supervisor.addons.options import AddonOptions
17+
from supervisor.const import BusEvent
1518
from supervisor.coresys import CoreSys
19+
from supervisor.dbus.agent.cgroup import CGroup
1620
from supervisor.docker.addon import DockerAddon
21+
from supervisor.docker.manager import DockerAPI
1722
from supervisor.exceptions import CoreDNSError, DockerNotFound
23+
from supervisor.hardware.data import Device
24+
from supervisor.os.manager import OSManager
1825
from supervisor.plugins.dns import PluginDns
19-
from supervisor.resolution.const import ContextType, IssueType
20-
from supervisor.resolution.data import Issue
26+
from supervisor.resolution.const import ContextType, IssueType, SuggestionType
27+
from supervisor.resolution.data import Issue, Suggestion
2128

2229
from ..common import load_json_fixture
2330
from . import DEV_MOUNT
@@ -380,3 +387,113 @@ async def test_addon_stop_delete_host_error(
380387
await docker_addon.stop()
381388

382389
capture_exception.assert_called_once_with(err)
390+
391+
392+
TEST_DEV_PATH = "/dev/ttyAMA0"
393+
TEST_SYSFS_PATH = "/sys/devices/platform/soc/ffe09000.usb/ff500000.usb/xhci-hcd.0.auto/usb1/1-1/1-1.1/1-1.1:1.0/tty/ttyACM0"
394+
TEST_HW_DEVICE = Device(
395+
name="ttyACM0",
396+
path=Path("/dev/ttyAMA0"),
397+
sysfs=Path(
398+
"/sys/devices/platform/soc/ffe09000.usb/ff500000.usb/xhci-hcd.0.auto/usb1/1-1/1-1.1/1-1.1:1.0/tty/ttyACM0"
399+
),
400+
subsystem="tty",
401+
parent=Path(
402+
"/sys/devices/platform/soc/ffe09000.usb/ff500000.usb/xhci-hcd.0.auto/usb1/1-1/1-1.1/1-1.1:1.0"
403+
),
404+
links=[
405+
Path(
406+
"/dev/serial/by-id/usb-Texas_Instruments_TI_CC2531_USB_CDC___0X0123456789ABCDEF-if00"
407+
),
408+
Path("/dev/serial/by-path/platform-xhci-hcd.0.auto-usb-0:1.1:1.0"),
409+
Path("/dev/serial/by-path/platform-xhci-hcd.0.auto-usbv2-0:1.1:1.0"),
410+
],
411+
attributes={},
412+
children=[],
413+
)
414+
415+
416+
@pytest.mark.usefixtures("path_extern")
417+
@pytest.mark.parametrize(
418+
("dev_path", "cgroup", "is_os"),
419+
[
420+
(TEST_DEV_PATH, "1", True),
421+
(TEST_SYSFS_PATH, "1", True),
422+
(TEST_DEV_PATH, "1", False),
423+
(TEST_SYSFS_PATH, "1", False),
424+
(TEST_DEV_PATH, "2", True),
425+
(TEST_SYSFS_PATH, "2", True),
426+
],
427+
)
428+
async def test_addon_new_device(
429+
coresys: CoreSys,
430+
install_addon_ssh: Addon,
431+
container: MagicMock,
432+
docker: DockerAPI,
433+
dev_path: str,
434+
cgroup: str,
435+
is_os: bool,
436+
):
437+
"""Test new device that is listed in static devices."""
438+
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
439+
install_addon_ssh.data["devices"] = [dev_path]
440+
container.id = 123
441+
docker.info.cgroup = cgroup
442+
443+
with (
444+
patch.object(Addon, "write_options"),
445+
patch.object(OSManager, "available", new=PropertyMock(return_value=is_os)),
446+
patch.object(CGroup, "add_devices_allowed") as add_devices,
447+
):
448+
await install_addon_ssh.start()
449+
450+
coresys.bus.fire_event(
451+
BusEvent.HARDWARE_NEW_DEVICE,
452+
TEST_HW_DEVICE,
453+
)
454+
await asyncio.sleep(0.01)
455+
456+
add_devices.assert_called_once_with(123, "c 0:0 rwm")
457+
458+
459+
@pytest.mark.usefixtures("path_extern")
460+
@pytest.mark.parametrize("dev_path", [TEST_DEV_PATH, TEST_SYSFS_PATH])
461+
async def test_addon_new_device_no_haos(
462+
coresys: CoreSys,
463+
install_addon_ssh: Addon,
464+
docker: DockerAPI,
465+
dev_path: str,
466+
):
467+
"""Test new device that is listed in static devices on non HAOS system with CGroup V2."""
468+
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
469+
install_addon_ssh.data["devices"] = [dev_path]
470+
docker.info.cgroup = "2"
471+
472+
with (
473+
patch.object(Addon, "write_options"),
474+
patch.object(OSManager, "available", new=PropertyMock(return_value=False)),
475+
patch.object(CGroup, "add_devices_allowed") as add_devices,
476+
):
477+
await install_addon_ssh.start()
478+
479+
coresys.bus.fire_event(
480+
BusEvent.HARDWARE_NEW_DEVICE,
481+
TEST_HW_DEVICE,
482+
)
483+
await asyncio.sleep(0.01)
484+
485+
add_devices.assert_not_called()
486+
487+
# Issue added with hardware event since access cannot be added dynamically
488+
assert install_addon_ssh.device_access_missing_issue in coresys.resolution.issues
489+
assert (
490+
Suggestion(
491+
SuggestionType.EXECUTE_RESTART, ContextType.ADDON, reference="local_ssh"
492+
)
493+
in coresys.resolution.suggestions
494+
)
495+
496+
# Stopping and removing the container clears it as access granted on next start
497+
await install_addon_ssh.stop()
498+
assert coresys.resolution.issues == []
499+
assert coresys.resolution.suggestions == []

tests/resolution/evaluation/test_evaluate_cgroup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ async def test_evaluation(coresys: CoreSys):
2626

2727
coresys.docker.info.cgroup = CGROUP_V2_VERSION
2828
await cgroup_version()
29-
assert cgroup_version.reason in coresys.resolution.unsupported
29+
assert cgroup_version.reason not in coresys.resolution.unsupported
3030
coresys.resolution.unsupported.clear()
3131

3232
coresys.docker.info.cgroup = CGROUP_V1_VERSION

0 commit comments

Comments
 (0)