Skip to content

Commit 1396050

Browse files
committed
Intercept execve/execveat under COW for upper layer binaries
Signed-off-by: Cong Wang <cwang@multikernel.io>
1 parent 80caea6 commit 1396050

File tree

3 files changed

+136
-0
lines changed

3 files changed

+136
-0
lines changed

src/sandlock/_context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def _notif_syscall_names(notif: "NotifPolicy") -> list[str]:
117117
"newfstatat", "statx", "faccessat",
118118
"symlinkat", "linkat", "fchmodat", "fchownat",
119119
"readlinkat", "truncate", "utimensat", "getdents64",
120+
"execve", "execveat",
120121
# Non-at variants (x86_64 has both, aarch64 only has *at)
121122
"unlink", "rmdir", "mkdir", "rename",
122123
"stat", "lstat", "access",

src/sandlock/_notif.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,76 @@ def _dispatch(self, notif: SeccompNotif) -> None:
938938
self._respond_continue(notif.id)
939939
return
940940

941+
# --- COW: execve / execveat path redirection ---
942+
if self._cow_handler is not None:
943+
nr_execve = _SYSCALL_NR.get("execve")
944+
nr_execveat = _SYSCALL_NR.get("execveat")
945+
946+
if nr in (nr_execve, nr_execveat):
947+
try:
948+
if nr == nr_execve:
949+
# execve(pathname, argv, envp)
950+
pathname_addr = notif.data.args[0]
951+
path = resolve_openat_path(pid, -100, pathname_addr)
952+
else:
953+
# execveat(dirfd, pathname, argv, envp, flags)
954+
dirfd = ctypes.c_int32(notif.data.args[0] & 0xFFFFFFFF).value
955+
exec_flags = notif.data.args[4]
956+
if exec_flags & 0x1000: # AT_EMPTY_PATH — fd-based, no path
957+
self._respond_continue(notif.id)
958+
return
959+
pathname_addr = notif.data.args[1]
960+
path = resolve_openat_path(pid, dirfd, pathname_addr)
961+
except OSError:
962+
self._respond_continue(notif.id)
963+
return
964+
965+
if not self._id_valid(notif.id):
966+
return
967+
968+
if not self._cow_handler.matches(path):
969+
self._respond_continue(notif.id)
970+
return
971+
972+
# Resolve through COW layer
973+
real_path = self._cow_handler.handle_stat(path)
974+
if real_path is None:
975+
# File deleted in COW
976+
self._respond_errno(notif.id, errno.ENOENT)
977+
return
978+
979+
# If unchanged (real_path == path), let kernel handle it
980+
if real_path == path:
981+
self._respond_continue(notif.id)
982+
return
983+
984+
# File is in upper layer — inject fd then rewrite path
985+
try:
986+
src_fd = os.open(real_path, os.O_RDONLY | os.O_CLOEXEC)
987+
except OSError:
988+
self._respond_continue(notif.id)
989+
return
990+
991+
try:
992+
child_fd = self._inject_fd(notif.id, src_fd, cloexec=False)
993+
finally:
994+
os.close(src_fd)
995+
996+
if child_fd < 0:
997+
self._respond_continue(notif.id)
998+
return
999+
1000+
# Overwrite the pathname in child memory with /proc/self/fd/N
1001+
proc_path = f"/proc/self/fd/{child_fd}\0".encode()
1002+
try:
1003+
write_bytes(pid, pathname_addr, proc_path)
1004+
except OSError:
1005+
self._respond_continue(notif.id)
1006+
return
1007+
1008+
self._respond_continue(notif.id)
1009+
return
1010+
9411011
# --- Filesystem: open / openat virtualization + COW ---
9421012
nr_openat = _SYSCALL_NR.get("openat")
9431013
nr_open = _SYSCALL_NR.get("open")
@@ -1050,6 +1120,26 @@ def _respond_addfd(self, notif_id: int, src_fd: int) -> None:
10501120
ctypes.byref(resp),
10511121
)
10521122

1123+
def _inject_fd(self, notif_id: int, src_fd: int,
1124+
cloexec: bool = False) -> int:
1125+
"""Inject an fd into the child without completing the notification.
1126+
1127+
Returns the fd number in the child's table, or -1 on failure.
1128+
"""
1129+
addfd = SeccompNotifAddfd()
1130+
addfd.id = notif_id
1131+
addfd.flags = 0 # Don't auto-send response
1132+
addfd.srcfd = src_fd
1133+
addfd.newfd = 0
1134+
addfd.newfd_flags = os.O_CLOEXEC if cloexec else 0
1135+
1136+
ret = _libc.ioctl(
1137+
ctypes.c_int(self._notify_fd),
1138+
ctypes.c_ulong(SECCOMP_IOCTL_NOTIF_ADDFD),
1139+
ctypes.byref(addfd),
1140+
)
1141+
return ret
1142+
10531143
# Network, memory, and fork handlers moved to _network.py and _resource.py
10541144

10551145
def _handle_port_remap(self, notif: SeccompNotif, nr: int) -> None:

tests/test_integration.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,51 @@ def test_disk_quota_only_counts_delta(self, isolation):
10651065
assert result.success
10661066
assert b"orig=51200" in result.stdout
10671067

1068+
def test_cow_execve_runs_upper_binary(self, isolation):
1069+
"""execve on a binary created in COW upper layer should work."""
1070+
import subprocess as sp
1071+
1072+
def create_and_exec():
1073+
with open('hello.sh', 'w') as f:
1074+
f.write('#!/bin/sh\necho HELLO_FROM_COW\n')
1075+
os.chmod('hello.sh', 0o755)
1076+
return sp.check_output(['./hello.sh']).decode().strip()
1077+
1078+
with tempfile.TemporaryDirectory() as td:
1079+
os.makedirs(f"{td}/project")
1080+
policy = _make_cow_policy(
1081+
f"{td}/project", td, isolation,
1082+
on_exit=BranchAction.ABORT,
1083+
)
1084+
result = Sandbox(policy).call(create_and_exec)
1085+
assert result.success, f"Failed: {result.error}"
1086+
assert result.value == "HELLO_FROM_COW"
1087+
assert not os.path.exists(f"{td}/project/hello.sh")
1088+
1089+
def test_cow_execve_modified_binary(self, isolation):
1090+
"""execve on a binary modified in COW upper layer runs the new version."""
1091+
import subprocess as sp
1092+
1093+
with tempfile.TemporaryDirectory() as td:
1094+
os.makedirs(f"{td}/project")
1095+
with open(f"{td}/project/run.sh", "w") as f:
1096+
f.write("#!/bin/sh\necho ORIGINAL\n")
1097+
os.chmod(f"{td}/project/run.sh", 0o755)
1098+
1099+
def modify_and_exec():
1100+
with open('run.sh', 'w') as f:
1101+
f.write('#!/bin/sh\necho MODIFIED\n')
1102+
return sp.check_output(['./run.sh']).decode().strip()
1103+
1104+
policy = _make_cow_policy(
1105+
f"{td}/project", td, isolation,
1106+
on_exit=BranchAction.ABORT,
1107+
)
1108+
result = Sandbox(policy).call(modify_and_exec)
1109+
assert result.success, f"Failed: {result.error}"
1110+
assert result.value == "MODIFIED"
1111+
assert open(f"{td}/project/run.sh").read() == "#!/bin/sh\necho ORIGINAL\n"
1112+
10681113
def test_cow_chown_goes_to_upper(self, isolation):
10691114
"""chown on a COW file operates on the upper copy, not the original."""
10701115
with tempfile.TemporaryDirectory() as td:

0 commit comments

Comments
 (0)