@@ -164,97 +164,101 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
164164 return program , nil
165165}
166166
167- type nativeArch uint32
167+ type linuxAuditArch uint32
168168
169- const invalidArch nativeArch = 0
169+ const invalidArch linuxAuditArch = 0
170170
171- func archToNative (arch libseccomp.ScmpArch ) (nativeArch , error ) {
171+ func scmpArchToAuditArch (arch libseccomp.ScmpArch ) (linuxAuditArch , error ) {
172172 switch arch {
173173 case libseccomp .ArchNative :
174174 // Convert to actual native architecture.
175175 arch , err := libseccomp .GetNativeArch ()
176176 if err != nil {
177177 return invalidArch , fmt .Errorf ("unable to get native arch: %w" , err )
178178 }
179- return archToNative (arch )
179+ return scmpArchToAuditArch (arch )
180180 case libseccomp .ArchX86 :
181- return nativeArch (C .C_AUDIT_ARCH_I386 ), nil
181+ return linuxAuditArch (C .C_AUDIT_ARCH_I386 ), nil
182182 case libseccomp .ArchAMD64 , libseccomp .ArchX32 :
183183 // NOTE: x32 is treated like x86_64 except all x32 syscalls have the
184184 // 30th bit of the syscall number set to indicate that it's not a
185185 // normal x86_64 syscall.
186- return nativeArch (C .C_AUDIT_ARCH_X86_64 ), nil
186+ return linuxAuditArch (C .C_AUDIT_ARCH_X86_64 ), nil
187187 case libseccomp .ArchARM :
188- return nativeArch (C .C_AUDIT_ARCH_ARM ), nil
188+ return linuxAuditArch (C .C_AUDIT_ARCH_ARM ), nil
189189 case libseccomp .ArchARM64 :
190- return nativeArch (C .C_AUDIT_ARCH_AARCH64 ), nil
190+ return linuxAuditArch (C .C_AUDIT_ARCH_AARCH64 ), nil
191191 case libseccomp .ArchMIPS :
192- return nativeArch (C .C_AUDIT_ARCH_MIPS ), nil
192+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPS ), nil
193193 case libseccomp .ArchMIPS64 :
194- return nativeArch (C .C_AUDIT_ARCH_MIPS64 ), nil
194+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPS64 ), nil
195195 case libseccomp .ArchMIPS64N32 :
196- return nativeArch (C .C_AUDIT_ARCH_MIPS64N32 ), nil
196+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPS64N32 ), nil
197197 case libseccomp .ArchMIPSEL :
198- return nativeArch (C .C_AUDIT_ARCH_MIPSEL ), nil
198+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPSEL ), nil
199199 case libseccomp .ArchMIPSEL64 :
200- return nativeArch (C .C_AUDIT_ARCH_MIPSEL64 ), nil
200+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPSEL64 ), nil
201201 case libseccomp .ArchMIPSEL64N32 :
202- return nativeArch (C .C_AUDIT_ARCH_MIPSEL64N32 ), nil
202+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPSEL64N32 ), nil
203203 case libseccomp .ArchPPC :
204- return nativeArch (C .C_AUDIT_ARCH_PPC ), nil
204+ return linuxAuditArch (C .C_AUDIT_ARCH_PPC ), nil
205205 case libseccomp .ArchPPC64 :
206- return nativeArch (C .C_AUDIT_ARCH_PPC64 ), nil
206+ return linuxAuditArch (C .C_AUDIT_ARCH_PPC64 ), nil
207207 case libseccomp .ArchPPC64LE :
208- return nativeArch (C .C_AUDIT_ARCH_PPC64LE ), nil
208+ return linuxAuditArch (C .C_AUDIT_ARCH_PPC64LE ), nil
209209 case libseccomp .ArchS390 :
210- return nativeArch (C .C_AUDIT_ARCH_S390 ), nil
210+ return linuxAuditArch (C .C_AUDIT_ARCH_S390 ), nil
211211 case libseccomp .ArchS390X :
212- return nativeArch (C .C_AUDIT_ARCH_S390X ), nil
212+ return linuxAuditArch (C .C_AUDIT_ARCH_S390X ), nil
213213 case libseccomp .ArchRISCV64 :
214- return nativeArch (C .C_AUDIT_ARCH_RISCV64 ), nil
214+ return linuxAuditArch (C .C_AUDIT_ARCH_RISCV64 ), nil
215215 default :
216216 return invalidArch , fmt .Errorf ("unknown architecture: %v" , arch )
217217 }
218218}
219219
220- type lastSyscallMap map [nativeArch ]map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall
220+ type lastSyscallMap map [linuxAuditArch ]map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall
221221
222222// Figure out largest syscall number referenced in the filter for each
223223// architecture. We will be generating code based on the native architecture
224224// representation, but SCMP_ARCH_X32 means we have to track cases where the
225225// same architecture has different largest syscalls based on the mode.
226226func findLastSyscalls (config * configs.Seccomp ) (lastSyscallMap , error ) {
227- lastSyscalls := make (lastSyscallMap )
228- // Only loop over architectures which are present in the filter. Any other
229- // architectures will get the libseccomp bad architecture action anyway.
227+ scmpArchs := make (map [libseccomp.ScmpArch ]struct {})
230228 for _ , ociArch := range config .Architectures {
231229 arch , err := libseccomp .GetArchFromString (ociArch )
232230 if err != nil {
233231 return nil , fmt .Errorf ("unable to validate seccomp architecture: %w" , err )
234232 }
233+ scmpArchs [arch ] = struct {}{}
234+ }
235+ // On architectures like ppc64le, Docker inexplicably doesn't include the
236+ // native architecture in the architecture list which results in no
237+ // architectures being present in the list at all (rendering the ENOSYS
238+ // stub a no-op). So, always include the native architecture.
239+ if nativeScmpArch , err := libseccomp .GetNativeArch (); err != nil {
240+ return nil , fmt .Errorf ("unable to get native arch: %w" , err )
241+ } else if _ , ok := scmpArchs [nativeScmpArch ]; ! ok {
242+ logrus .Debugf ("seccomp: adding implied native architecture %v to config set" , nativeScmpArch )
243+ scmpArchs [nativeScmpArch ] = struct {}{}
244+ }
245+ logrus .Debugf ("seccomp: configured architecture set: %s" , scmpArchs )
235246
236- // Map native architecture to a real architecture value to avoid
237- // doubling-up the lastSyscall mapping.
238- if arch == libseccomp .ArchNative {
239- nativeArch , err := libseccomp .GetNativeArch ()
240- if err != nil {
241- return nil , fmt .Errorf ("unable to get native architecture: %w" , err )
242- }
243- arch = nativeArch
244- }
245-
246- // Figure out native architecture representation of the architecture.
247- nativeArch , err := archToNative (arch )
247+ // Only loop over architectures which are present in the filter. Any other
248+ // architectures will get the libseccomp bad architecture action anyway.
249+ lastSyscalls := make (lastSyscallMap )
250+ for arch := range scmpArchs {
251+ auditArch , err := scmpArchToAuditArch (arch )
248252 if err != nil {
249253 return nil , fmt .Errorf ("cannot map architecture %v to AUDIT_ARCH_ constant: %w" , arch , err )
250254 }
251255
252- if _ , ok := lastSyscalls [nativeArch ]; ! ok {
253- lastSyscalls [nativeArch ] = map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall {}
256+ if _ , ok := lastSyscalls [auditArch ]; ! ok {
257+ lastSyscalls [auditArch ] = map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall {}
254258 }
255- if _ , ok := lastSyscalls [nativeArch ][arch ]; ok {
259+ if _ , ok := lastSyscalls [auditArch ][arch ]; ok {
256260 // Because of ArchNative we may hit the same entry multiple times.
257- // Just skip it if we've seen this (nativeArch , ScmpArch)
261+ // Just skip it if we've seen this (linuxAuditArch , ScmpArch)
258262 // combination before.
259263 continue
260264 }
@@ -272,10 +276,11 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
272276 }
273277 }
274278 if largestSyscall != 0 {
275- lastSyscalls [nativeArch ][arch ] = largestSyscall
279+ logrus .Debugf ("seccomp: largest syscall number for arch %v is %v" , arch , largestSyscall )
280+ lastSyscalls [auditArch ][arch ] = largestSyscall
276281 } else {
277- logrus .Warnf ("could not find any syscalls for arch %s " , ociArch )
278- delete (lastSyscalls [nativeArch ], arch )
282+ logrus .Warnf ("could not find any syscalls for arch %v " , arch )
283+ delete (lastSyscalls [auditArch ], arch )
279284 }
280285 }
281286 return lastSyscalls , nil
@@ -293,10 +298,10 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
293298// close_range(2) which were added out-of-order in the syscall table between
294299// kernel releases.
295300func generateEnosysStub (lastSyscalls lastSyscallMap ) ([]bpf.Instruction , error ) {
296- // A jump-table for each nativeArch used to generate the initial
301+ // A jump-table for each linuxAuditArch used to generate the initial
297302 // conditional jumps -- measured from the *END* of the program so they
298303 // remain valid after prepending to the tail.
299- archJumpTable := map [nativeArch ]uint32 {}
304+ archJumpTable := map [linuxAuditArch ]uint32 {}
300305
301306 // Generate our own -ENOSYS rules for each architecture. They have to be
302307 // generated in reverse (prepended to the tail of the program) because the
@@ -309,7 +314,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
309314 }
310315
311316 // Generate the syscall -ENOSYS rules.
312- for nativeArch , maxSyscalls := range lastSyscalls {
317+ for auditArch , maxSyscalls := range lastSyscalls {
313318 // The number of instructions from the tail of this section which need
314319 // to be jumped in order to reach the -ENOSYS return. If the section
315320 // does not jump, it will fall through to the actual filter.
@@ -390,7 +395,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
390395
391396 // If we're on x86 we need to add a check for x32 and if we're in
392397 // the wrong mode we jump over the section.
393- if uint32 (nativeArch ) == uint32 (C .C_AUDIT_ARCH_X86_64 ) {
398+ if uint32 (auditArch ) == uint32 (C .C_AUDIT_ARCH_X86_64 ) {
394399 // Generate a prefix to check the mode.
395400 switch scmpArch {
396401 case libseccomp .ArchAMD64 :
@@ -419,8 +424,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
419424 section = append (section , sectionTail ... )
420425 case 2 :
421426 // x32 and x86_64 are a unique case, we can't handle any others.
422- if uint32 (nativeArch ) != uint32 (C .C_AUDIT_ARCH_X86_64 ) {
423- return nil , fmt .Errorf ("unknown architecture overlap on native arch %#x" , nativeArch )
427+ if uint32 (auditArch ) != uint32 (C .C_AUDIT_ARCH_X86_64 ) {
428+ return nil , fmt .Errorf ("unknown architecture overlap on native arch %#x" , auditArch )
424429 }
425430
426431 x32sysno , ok := maxSyscalls [libseccomp .ArchX32 ]
@@ -497,7 +502,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
497502 programTail = append (section , programTail ... )
498503
499504 // Update jump table.
500- archJumpTable [nativeArch ] = uint32 (len (programTail ))
505+ archJumpTable [auditArch ] = uint32 (len (programTail ))
501506 }
502507
503508 // Add a dummy "jump to filter" for any architecture we might miss below.
@@ -517,9 +522,9 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
517522 // architectures based on how large the jumps are going to be, or
518523 // re-sort the candidate architectures each time to make sure that we
519524 // pick the largest jump which is going to be smaller than 255.
520- for nativeArch := range lastSyscalls {
525+ for auditArch := range lastSyscalls {
521526 // We jump forwards but the jump table is calculated from the *END*.
522- jump := uint32 (len (programTail )) - archJumpTable [nativeArch ]
527+ jump := uint32 (len (programTail )) - archJumpTable [auditArch ]
523528
524529 // Same routine as above -- this is a basic jeq check, complicated
525530 // slightly if it turns out that we need to do a long jump.
@@ -528,7 +533,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
528533 // jeq [arch],[jump]
529534 bpf.JumpIf {
530535 Cond : bpf .JumpEqual ,
531- Val : uint32 (nativeArch ),
536+ Val : uint32 (auditArch ),
532537 SkipTrue : uint8 (jump ),
533538 },
534539 }, programTail ... )
@@ -537,7 +542,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
537542 // jne [arch],1
538543 bpf.JumpIf {
539544 Cond : bpf .JumpNotEqual ,
540- Val : uint32 (nativeArch ),
545+ Val : uint32 (auditArch ),
541546 SkipTrue : 1 ,
542547 },
543548 // ja [jump]
0 commit comments