From 729136a1a3ffbb746bd9152ba85a531c8b706046 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Wed, 4 Jun 2025 23:56:32 -0600 Subject: [PATCH 1/7] Label prometheus-podman-exporter as container_runtime_exec_t Without this label, running "prometheus-podman-exporter" as an SELinux-confined user results in a race condition: - If "prometheus-podman-exporter" starts before any containers, running any Podman commands (even just "podman system info") gives you a perplexing error: audit[26268]: AVC avc: denied { create } for pid=26268 comm="exe" name="whiteout" scontext=user_u:user_r:user_t:s0-s0:c0.c1023 tcontext=user_u:object_r:container_ro_file_t:s0 tclass=chr_file permissive=0 prometheus-podman-exporter[26268]: 2025/06/04 22:41:47 configure storage: kernel does not support overlay fs: unable to create kernel-style whiteout: permission denied podman[26287]: Error: configure storage: kernel does not support overlay fs: unable to create kernel-style whiteout: permission denied - If any containers start before "prometheus-podman-exporter", then everything is fine, except the "podman_container_mem_usage_bytes" metric is oddly missing. But as long as 1 container starts before "prometheus-podman-exporter", you're still able to run new containers, even after "prometheus-podman-exporter" has started. Signed-off-by: Max Chernoff --- container.fc | 1 + 1 file changed, 1 insertion(+) diff --git a/container.fc b/container.fc index f16a652..833d7a6 100644 --- a/container.fc +++ b/container.fc @@ -48,6 +48,7 @@ /usr/s?bin/crio.* -- gen_context(system_u:object_r:container_runtime_exec_t,s0) /usr/local/s?bin/crio.* -- gen_context(system_u:object_r:container_runtime_exec_t,s0) /usr/s?bin/ocid.* -- gen_context(system_u:object_r:container_runtime_exec_t,s0) +/usr/bin/prometheus-podman-exporter -- gen_context(system_u:object_r:container_runtime_exec_t,s0) /usr/lib/docker/docker-novolume-plugin -- gen_context(system_u:object_r:container_auth_exec_t,s0) /usr/lib/docker/[^/]*plugin -- gen_context(system_u:object_r:container_runtime_exec_t,s0) /usr/local/lib/docker/[^/]*plugin -- gen_context(system_u:object_r:container_runtime_exec_t,s0) From e27600f23604f74517291628220168db4f68b52d Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Thu, 5 Jun 2025 00:19:11 -0600 Subject: [PATCH 2/7] Allow "bootc status" to work for SELinux confined users Signed-off-by: Max Chernoff --- container.te | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/container.te b/container.te index a88fe27..2bb1ace 100644 --- a/container.te +++ b/container.te @@ -1631,3 +1631,13 @@ tunable_policy(`deny_ptrace',`',` # netavark needs to write to /run/sysctl.d and needs the right label for systemd to read it. # https://issues.redhat.com/browse/RHEL-91380 files_pid_filetrans(container_runtime_t, system_conf_t, dir, "sysctl.d") + +# Needed for "bootc status" to work (via sudo) as a confined user. +gen_require(` + attribute_role install_roles; + type sysadm_t; +') + +roleattribute sysadm_r install_roles; +allow sysadm_t install_t:process transition; +type_transition sysadm_t install_exec_t:process install_t; From 498d415b0b331c4114a6c580074986f48b4b5f82 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Thu, 5 Jun 2025 00:32:47 -0600 Subject: [PATCH 3/7] Allow building rpm-ostree images in containers ran by a confined users Signed-off-by: Max Chernoff --- container.te | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/container.te b/container.te index 2bb1ace..30853e7 100644 --- a/container.te +++ b/container.te @@ -1641,3 +1641,10 @@ gen_require(` roleattribute sysadm_r install_roles; allow sysadm_t install_t:process transition; type_transition sysadm_t install_exec_t:process install_t; + +# Needed to be able to build an rpm-ostree/bootc image, inside of a container +# ran by a confined user. +allow container_t container_ro_file_t:dir watch; +allow container_t devpts_t:filesystem mount; +allow container_t proc_t:filesystem mount; +allow container_t tmpfs_t:filesystem remount; From bc5c2ccd515e6943ada87e9e2e70baf0303f3232 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Thu, 5 Jun 2025 00:35:57 -0600 Subject: [PATCH 4/7] Allow systemd socket activation of containers ran by confined users Signed-off-by: Max Chernoff --- container.te | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/container.te b/container.te index 30853e7..19c41b4 100644 --- a/container.te +++ b/container.te @@ -1648,3 +1648,7 @@ allow container_t container_ro_file_t:dir watch; allow container_t devpts_t:filesystem mount; allow container_t proc_t:filesystem mount; allow container_t tmpfs_t:filesystem remount; + +# Needed to allow systemd socket activation of containers ran by confined users +allow userdomain container_runtime_t:tcp_socket { bind create getopt listen setopt }; +allow userdomain container_runtime_t:udp_socket { bind create getopt listen setopt }; From 7bf4b24ea22de4f1ea014ec225e4462e900e0047 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Thu, 19 Jun 2025 20:24:55 -0600 Subject: [PATCH 5/7] Allow systemd to kill containers Signed-off-by: Max Chernoff --- container.te | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/container.te b/container.te index 19c41b4..7925a8d 100644 --- a/container.te +++ b/container.te @@ -1652,3 +1652,8 @@ allow container_t tmpfs_t:filesystem remount; # Needed to allow systemd socket activation of containers ran by confined users allow userdomain container_runtime_t:tcp_socket { bind create getopt listen setopt }; allow userdomain container_runtime_t:udp_socket { bind create getopt listen setopt }; + +# Allow systemd to kill containers (needed for when stopping a Quadlet service +# times out) +allow userdomain container_runtime_t:process { sigkill signal signull }; +allow userdomain container_t:process { sigkill signal signull }; From 0d8b09e1bf5a9ad853067ec98d05e16aa3bbec47 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Thu, 19 Jun 2025 22:55:12 -0600 Subject: [PATCH 6/7] Allow confined users to run "podman build" Signed-off-by: Max Chernoff --- container.te | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/container.te b/container.te index 7925a8d..d2cc708 100644 --- a/container.te +++ b/container.te @@ -1657,3 +1657,7 @@ allow userdomain container_runtime_t:udp_socket { bind create getopt listen seto # times out) allow userdomain container_runtime_t:process { sigkill signal signull }; allow userdomain container_t:process { sigkill signal signull }; + +# Needed for "podman build" to work as a confined user +allow userdomain container_ro_file_t:dir mounton; +allow userdomain self:capability setuid; From a1914b9166cf8a510e779d9c3b09ff1b318f7e56 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Thu, 19 Jun 2025 22:58:41 -0600 Subject: [PATCH 7/7] Ignore some harmless AVC denials Signed-off-by: Max Chernoff --- container.te | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/container.te b/container.te index d2cc708..8bb89a9 100644 --- a/container.te +++ b/container.te @@ -1653,6 +1653,10 @@ allow container_t tmpfs_t:filesystem remount; allow userdomain container_runtime_t:tcp_socket { bind create getopt listen setopt }; allow userdomain container_runtime_t:udp_socket { bind create getopt listen setopt }; +# When shutting down, systemd will stop the container before the socket unit, so +# ignore any AVC denials from systemd trying to accept the socket +dontaudit userdomain container_runtime_t:tcp_socket accept; + # Allow systemd to kill containers (needed for when stopping a Quadlet service # times out) allow userdomain container_runtime_t:process { sigkill signal signull }; @@ -1661,3 +1665,9 @@ allow userdomain container_t:process { sigkill signal signull }; # Needed for "podman build" to work as a confined user allow userdomain container_ro_file_t:dir mounton; allow userdomain self:capability setuid; + +# Harmless AVC denial +dontaudit container_runtime_t self:process2 nnp_transition; + +# Ignore containers trying to chown stdin/stdout/stderr +dontaudit container_t container_runtime_t:fifo_file setattr;