@@ -80,6 +80,14 @@ import "C"
8080
8181var retErrnoEnosys = uint32 (C .C_ACT_ERRNO_ENOSYS )
8282
83+ // Assume sizeof(int) == 4 in the BPF program.
84+ const bpfSizeofInt = 4
85+
86+ // This syscall is used for multiplexing "large" syscalls on s390(x). Unknown
87+ // syscalls will end up with this syscall number, so we need to explcitly
88+ // return -ENOSYS for this syscall on those architectures.
89+ const s390xMultiplexSyscall libseccomp.ScmpSyscall = 0
90+
8391func isAllowAction (action configs.Action ) bool {
8492 switch action {
8593 // Trace is considered an "allow" action because a good tracer should
@@ -94,15 +102,14 @@ func isAllowAction(action configs.Action) bool {
94102
95103func parseProgram (rdr io.Reader ) ([]bpf.RawInstruction , error ) {
96104 var program []bpf.RawInstruction
97- loop:
98105 for {
99106 // Read the next instruction. We have to use NativeEndian because
100107 // seccomp_export_bpf outputs the program in *host* endian-ness.
101108 var insn unix.SockFilter
102109 if err := binary .Read (rdr , utils .NativeEndian , & insn ); err != nil {
103110 if errors .Is (err , io .EOF ) {
104111 // Parsing complete.
105- break loop
112+ break
106113 }
107114 if errors .Is (err , io .ErrUnexpectedEOF ) {
108115 // Parsing stopped mid-instruction.
@@ -315,19 +322,46 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
315322 // directly from the arch code so we need to do it here. Sadly we can't
316323 // share this code between architecture branches.
317324 section := []bpf.Instruction {
318- // load [0]
319- bpf.LoadAbsolute {Off : 0 , Size : 4 }, // NOTE: We assume sizeof(int) == 4.
325+ // load [0] (syscall number)
326+ bpf.LoadAbsolute {Off : 0 , Size : bpfSizeofInt },
320327 }
321328
322329 switch len (maxSyscalls ) {
323330 case 0 :
324331 // No syscalls found for this arch -- skip it and move on.
325332 continue
326333 case 1 :
327- // Get the only syscall in the map.
328- var sysno libseccomp.ScmpSyscall
329- for _ , no := range maxSyscalls {
334+ // Get the only syscall and scmpArch in the map.
335+ var (
336+ scmpArch libseccomp.ScmpArch
337+ sysno libseccomp.ScmpSyscall
338+ )
339+ for arch , no := range maxSyscalls {
330340 sysno = no
341+ scmpArch = arch
342+ }
343+
344+ switch scmpArch {
345+ // Return -ENOSYS for setup(2) on s390(x). This syscall is used for
346+ // multiplexing "large syscall number" syscalls, but if the syscall
347+ // number is not known to the kernel then the syscall number is
348+ // left unchanged (and because it is sysno=0, you'll end up with
349+ // EPERM for syscalls the kernel doesn't know about).
350+ //
351+ // The actual setup(2) syscall is never used by userspace anymore
352+ // (and hasn't existed for decades) outside of this multiplexing
353+ // scheme so returning -ENOSYS is fine.
354+ case libseccomp .ArchS390 , libseccomp .ArchS390X :
355+ section = append (section , []bpf.Instruction {
356+ // jne [setup=0],1
357+ bpf.JumpIf {
358+ Cond : bpf .JumpNotEqual ,
359+ Val : uint32 (s390xMultiplexSyscall ),
360+ SkipTrue : 1 ,
361+ },
362+ // ret [ENOSYS]
363+ bpf.RetConstant {Val : retErrnoEnosys },
364+ }... )
331365 }
332366
333367 // The simplest case just boils down to a single jgt instruction,
@@ -349,8 +383,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
349383 sectionTail = []bpf.Instruction {
350384 // jle [syscall],1
351385 bpf.JumpIf {Cond : bpf .JumpLessOrEqual , Val : uint32 (sysno ), SkipTrue : 1 },
352- // ja [baseJumpEnosys+1 ]
353- bpf.Jump { Skip : baseJumpEnosys + 1 },
386+ // ret [ENOSYS ]
387+ bpf.RetConstant { Val : retErrnoEnosys },
354388 // ja [baseJumpFilter]
355389 bpf.Jump {Skip : baseJumpFilter },
356390 }
@@ -359,12 +393,6 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
359393 // If we're on x86 we need to add a check for x32 and if we're in
360394 // the wrong mode we jump over the section.
361395 if uint32 (nativeArch ) == uint32 (C .C_AUDIT_ARCH_X86_64 ) {
362- // Grab the only architecture in the map.
363- var scmpArch libseccomp.ScmpArch
364- for arch := range maxSyscalls {
365- scmpArch = arch
366- }
367-
368396 // Generate a prefix to check the mode.
369397 switch scmpArch {
370398 case libseccomp .ArchAMD64 :
@@ -440,7 +468,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
440468 // jset (1<<30),1
441469 // jgt [x86 syscall],1,2
442470 // jle [x32 syscall],1
443- // ja [baseJumpEnosys+1 ]
471+ // ret [ENOSYS ]
444472 // ja [baseJumpFilter]
445473 section = append (section , []bpf.Instruction {
446474 // jset (1<<30),1
@@ -451,14 +479,14 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
451479 Val : uint32 (x86sysno ),
452480 SkipTrue : 1 , SkipFalse : 2 ,
453481 },
454- // jle [x32 syscall],[baseJumpEnosys]
482+ // jle [x32 syscall],1
455483 bpf.JumpIf {
456484 Cond : bpf .JumpLessOrEqual ,
457485 Val : uint32 (x32sysno ),
458486 SkipTrue : 1 ,
459487 },
460- // ja [baseJumpEnosys+1 ]
461- bpf.Jump { Skip : baseJumpEnosys + 1 },
488+ // ret [ENOSYS ]
489+ bpf.RetConstant { Val : retErrnoEnosys },
462490 // ja [baseJumpFilter]
463491 bpf.Jump {Skip : baseJumpFilter },
464492 }... )
@@ -522,8 +550,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
522550
523551 // Prepend the load instruction for the architecture.
524552 programTail = append ([]bpf.Instruction {
525- // load [4]
526- bpf.LoadAbsolute {Off : 4 , Size : 4 }, // NOTE: We assume sizeof(int) == 4.
553+ // load [4] (architecture)
554+ bpf.LoadAbsolute {Off : bpfSizeofInt , Size : bpfSizeofInt },
527555 }, programTail ... )
528556
529557 // And that's all folks!
0 commit comments