2424
2525from .exceptions import NotifError
2626from ._notif_policy import NotifAction , NotifPolicy
27- from ._procfs import read_bytes , resolve_openat_path
27+ from ._procfs import read_bytes , write_bytes , resolve_openat_path
2828from ._seccomp import (
2929 AUDIT_ARCH ,
3030 BPF_ABS ,
@@ -355,6 +355,72 @@ def _parse_msghdr_dest_ip(pid: int, msghdr_addr: int) -> str | None:
355355 return _parse_dest_ip (pid , name_addr , name_len )
356356
357357
358+ # --- getdents64 helpers ---
359+
360+ def _build_dirent64 (d_ino : int , d_off : int , d_type : int , name : str ) -> bytes :
361+ """Build a single linux_dirent64 entry.
362+
363+ struct linux_dirent64 {
364+ u64 d_ino; // 0
365+ s64 d_off; // 8
366+ u16 d_reclen; // 16
367+ u8 d_type; // 18
368+ char d_name[]; // 19+
369+ };
370+ d_reclen is 8-byte aligned.
371+ """
372+ name_bytes = name .encode ("utf-8" ) + b"\0 "
373+ # 19 bytes header + name + padding to 8-byte alignment
374+ reclen = 19 + len (name_bytes )
375+ reclen = (reclen + 7 ) & ~ 7 # align to 8
376+ buf = bytearray (reclen )
377+ struct .pack_into ("QqHB" , buf , 0 , d_ino , d_off , reclen , d_type )
378+ buf [19 :19 + len (name_bytes )] = name_bytes
379+ return bytes (buf )
380+
381+
382+ def _build_filtered_dirents (sandbox_pids : set [int ]) -> list [bytes ]:
383+ """Build a list of dirent64 entries for /proc, filtering out foreign PIDs.
384+
385+ Reads the real /proc directory in the supervisor process and builds
386+ synthetic dirent64 entries, excluding PID directories not in sandbox_pids.
387+ """
388+ DT_DIR = 4
389+ DT_REG = 8
390+ DT_LNK = 10
391+ entries = []
392+ d_off = 0
393+ try :
394+ with os .scandir ("/proc" ) as it :
395+ for entry in it :
396+ name = entry .name
397+ # Filter out foreign PID directories
398+ if name .isdigit ():
399+ if int (name ) not in sandbox_pids :
400+ continue
401+
402+ d_off += 1
403+ try :
404+ if entry .is_dir (follow_symlinks = False ):
405+ d_type = DT_DIR
406+ elif entry .is_symlink ():
407+ d_type = DT_LNK
408+ else :
409+ d_type = DT_REG
410+ except OSError :
411+ d_type = DT_REG
412+
413+ try :
414+ d_ino = entry .inode ()
415+ except OSError :
416+ d_ino = 0
417+
418+ entries .append (_build_dirent64 (d_ino , d_off , d_type , name ))
419+ except OSError :
420+ pass
421+ return entries
422+
423+
358424# --- Notification supervisor ---
359425
360426class NotifSupervisor :
@@ -384,6 +450,8 @@ def __init__(
384450 self ._brk_base : dict [int , int ] = {} # pid → last known brk
385451 self ._proc_count : int = 1 # Start at 1 (the initial child)
386452 self ._proc_pids : set [int ] = {child_pid } # All known sandbox PIDs
453+ # getdents /proc filtering: fd → list of remaining dirent entries
454+ self ._proc_dir_cache : dict [int , list [bytes ]] = {}
387455
388456 def start (self ) -> None :
389457 """Start the supervisor thread."""
@@ -499,6 +567,14 @@ def _dispatch(self, notif: SeccompNotif) -> None:
499567 self ._handle_net (notif , nr )
500568 return
501569
570+ # --- /proc readdir PID filtering ---
571+ nr_getdents64 = _SYSCALL_NR .get ("getdents64" )
572+ nr_getdents = _SYSCALL_NR .get ("getdents" )
573+
574+ if nr in (nr_getdents64 , nr_getdents ) and self ._policy .isolate_pids :
575+ self ._handle_getdents (notif )
576+ return
577+
502578 # --- Filesystem: open / openat virtualization ---
503579 nr_openat = _SYSCALL_NR .get ("openat" )
504580 nr_open = _SYSCALL_NR .get ("open" )
@@ -658,8 +734,77 @@ def _handle_fork(self, notif: SeccompNotif, nr: int) -> None:
658734 # The new child's PID is unknown until it makes its first
659735 # intercepted syscall — tracked lazily via _record_pid.
660736 self ._proc_pids .add (notif .pid )
737+ # Invalidate /proc readdir cache so new PIDs appear
738+ self ._proc_dir_cache .clear ()
661739 self ._respond_continue (notif .id )
662740
741+ def _handle_getdents (self , notif : SeccompNotif ) -> None :
742+ """Handle getdents64/getdents — filter /proc readdir to hide foreign PIDs.
743+
744+ On first call for a given fd, reads all /proc entries from the
745+ supervisor, filters out foreign PIDs, builds dirent64 entries,
746+ and caches them. Each call returns as many cached entries as fit
747+ in the child's buffer, then returns 0 when exhausted.
748+ """
749+ pid = notif .pid
750+ child_fd_num = notif .data .args [0 ] & 0xFFFFFFFF
751+ buf_addr = notif .data .args [1 ]
752+ buf_size = notif .data .args [2 ] & 0xFFFFFFFF
753+
754+ # Check if the fd points to /proc
755+ try :
756+ target = os .readlink (f"/proc/{ pid } /fd/{ child_fd_num } " )
757+ except OSError :
758+ self ._respond_continue (notif .id )
759+ return
760+
761+ if target != "/proc" :
762+ self ._respond_continue (notif .id )
763+ return
764+
765+ # Build cache on first call for this fd
766+ cache_key = (pid , child_fd_num )
767+ if cache_key not in self ._proc_dir_cache :
768+ sandbox_pids = None
769+ if self ._pids_fn is not None :
770+ sandbox_pids = set (self ._pids_fn ())
771+ if sandbox_pids is None :
772+ self ._respond_continue (notif .id )
773+ return
774+
775+ entries = _build_filtered_dirents (sandbox_pids )
776+ self ._proc_dir_cache [cache_key ] = entries
777+
778+ entries = self ._proc_dir_cache [cache_key ]
779+
780+ if not self ._id_valid (notif .id ):
781+ return
782+
783+ # Pack as many entries as fit into buf_size
784+ result = bytearray ()
785+ consumed = 0
786+ for entry in entries :
787+ if len (result ) + len (entry ) > buf_size :
788+ break
789+ result .extend (entry )
790+ consumed += 1
791+
792+ # Remove consumed entries from cache
793+ if consumed > 0 :
794+ self ._proc_dir_cache [cache_key ] = entries [consumed :]
795+ elif not entries :
796+ # All entries consumed — clean up cache
797+ del self ._proc_dir_cache [cache_key ]
798+
799+ # Write to child memory and return byte count
800+ try :
801+ if result :
802+ write_bytes (pid , buf_addr , bytes (result ))
803+ self ._respond_val (notif .id , len (result ))
804+ except OSError :
805+ self ._proc_dir_cache .pop (cache_key , None )
806+ self ._respond_continue (notif .id )
807+
663808 def _id_valid (self , notif_id : int ) -> bool :
664809 """Check if a notification ID is still valid (TOCTTOU check)."""
665810 id_val = ctypes .c_uint64 (notif_id )
@@ -683,6 +828,19 @@ def _respond_continue(self, notif_id: int) -> None:
683828 ctypes .byref (resp ),
684829 )
685830
831+ def _respond_val (self , notif_id : int , val : int ) -> None :
832+ """Return a specific value as the syscall result."""
833+ resp = SeccompNotifResp ()
834+ resp .id = notif_id
835+ resp .val = val
836+ resp .error = 0
837+ resp .flags = 0
838+ _libc .ioctl (
839+ ctypes .c_int (self ._notify_fd ),
840+ ctypes .c_ulong (SECCOMP_IOCTL_NOTIF_SEND ),
841+ ctypes .byref (resp ),
842+ )
843+
686844 def _respond_errno (self , notif_id : int , errno_code : int ) -> None :
687845 """Deny the syscall with the given errno."""
688846 resp = SeccompNotifResp ()
0 commit comments