@@ -171,87 +171,101 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
171171 return program , nil
172172}
173173
174- type nativeArch uint32
174+ type linuxAuditArch uint32
175175
176- const invalidArch nativeArch = 0
176+ const invalidArch linuxAuditArch = 0
177177
178- func archToNative (arch libseccomp.ScmpArch ) (nativeArch , error ) {
178+ func scmpArchToAuditArch (arch libseccomp.ScmpArch ) (linuxAuditArch , error ) {
179179 switch arch {
180180 case libseccomp .ArchNative :
181181 // Convert to actual native architecture.
182182 arch , err := libseccomp .GetNativeArch ()
183183 if err != nil {
184184 return invalidArch , fmt .Errorf ("unable to get native arch: %w" , err )
185185 }
186- return archToNative (arch )
186+ return scmpArchToAuditArch (arch )
187187 case libseccomp .ArchX86 :
188- return nativeArch (C .C_AUDIT_ARCH_I386 ), nil
188+ return linuxAuditArch (C .C_AUDIT_ARCH_I386 ), nil
189189 case libseccomp .ArchAMD64 , libseccomp .ArchX32 :
190190 // NOTE: x32 is treated like x86_64 except all x32 syscalls have the
191191 // 30th bit of the syscall number set to indicate that it's not a
192192 // normal x86_64 syscall.
193- return nativeArch (C .C_AUDIT_ARCH_X86_64 ), nil
193+ return linuxAuditArch (C .C_AUDIT_ARCH_X86_64 ), nil
194194 case libseccomp .ArchARM :
195- return nativeArch (C .C_AUDIT_ARCH_ARM ), nil
195+ return linuxAuditArch (C .C_AUDIT_ARCH_ARM ), nil
196196 case libseccomp .ArchARM64 :
197- return nativeArch (C .C_AUDIT_ARCH_AARCH64 ), nil
197+ return linuxAuditArch (C .C_AUDIT_ARCH_AARCH64 ), nil
198198 case libseccomp .ArchMIPS :
199- return nativeArch (C .C_AUDIT_ARCH_MIPS ), nil
199+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPS ), nil
200200 case libseccomp .ArchMIPS64 :
201- return nativeArch (C .C_AUDIT_ARCH_MIPS64 ), nil
201+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPS64 ), nil
202202 case libseccomp .ArchMIPS64N32 :
203- return nativeArch (C .C_AUDIT_ARCH_MIPS64N32 ), nil
203+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPS64N32 ), nil
204204 case libseccomp .ArchMIPSEL :
205- return nativeArch (C .C_AUDIT_ARCH_MIPSEL ), nil
205+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPSEL ), nil
206206 case libseccomp .ArchMIPSEL64 :
207- return nativeArch (C .C_AUDIT_ARCH_MIPSEL64 ), nil
207+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPSEL64 ), nil
208208 case libseccomp .ArchMIPSEL64N32 :
209- return nativeArch (C .C_AUDIT_ARCH_MIPSEL64N32 ), nil
209+ return linuxAuditArch (C .C_AUDIT_ARCH_MIPSEL64N32 ), nil
210210 case libseccomp .ArchPPC :
211- return nativeArch (C .C_AUDIT_ARCH_PPC ), nil
211+ return linuxAuditArch (C .C_AUDIT_ARCH_PPC ), nil
212212 case libseccomp .ArchPPC64 :
213- return nativeArch (C .C_AUDIT_ARCH_PPC64 ), nil
213+ return linuxAuditArch (C .C_AUDIT_ARCH_PPC64 ), nil
214214 case libseccomp .ArchPPC64LE :
215- return nativeArch (C .C_AUDIT_ARCH_PPC64LE ), nil
215+ return linuxAuditArch (C .C_AUDIT_ARCH_PPC64LE ), nil
216216 case libseccomp .ArchS390 :
217- return nativeArch (C .C_AUDIT_ARCH_S390 ), nil
217+ return linuxAuditArch (C .C_AUDIT_ARCH_S390 ), nil
218218 case libseccomp .ArchS390X :
219- return nativeArch (C .C_AUDIT_ARCH_S390X ), nil
219+ return linuxAuditArch (C .C_AUDIT_ARCH_S390X ), nil
220220 case libseccomp .ArchRISCV64 :
221- return nativeArch (C .C_AUDIT_ARCH_RISCV64 ), nil
221+ return linuxAuditArch (C .C_AUDIT_ARCH_RISCV64 ), nil
222222 default :
223223 return invalidArch , fmt .Errorf ("unknown architecture: %v" , arch )
224224 }
225225}
226226
227- type lastSyscallMap map [nativeArch ]map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall
227+ type lastSyscallMap map [linuxAuditArch ]map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall
228228
229229// Figure out largest syscall number referenced in the filter for each
230230// architecture. We will be generating code based on the native architecture
231231// representation, but SCMP_ARCH_X32 means we have to track cases where the
232232// same architecture has different largest syscalls based on the mode.
233233func findLastSyscalls (config * configs.Seccomp ) (lastSyscallMap , error ) {
234- lastSyscalls := make (lastSyscallMap )
235- // Only loop over architectures which are present in the filter. Any other
236- // architectures will get the libseccomp bad architecture action anyway.
234+ scmpArchs := make (map [libseccomp.ScmpArch ]struct {})
237235 for _ , ociArch := range config .Architectures {
238236 arch , err := libseccomp .GetArchFromString (ociArch )
239237 if err != nil {
240238 return nil , fmt .Errorf ("unable to validate seccomp architecture: %w" , err )
241239 }
240+ scmpArchs [arch ] = struct {}{}
241+ }
242+ // On architectures like ppc64le, Docker inexplicably doesn't include the
243+ // native architecture in the architecture list which results in no
244+ // architectures being present in the list at all (rendering the ENOSYS
245+ // stub a no-op). So, always include the native architecture.
246+ if nativeScmpArch , err := libseccomp .GetNativeArch (); err != nil {
247+ return nil , fmt .Errorf ("unable to get native arch: %w" , err )
248+ } else if _ , ok := scmpArchs [nativeScmpArch ]; ! ok {
249+ logrus .Debugf ("seccomp: adding implied native architecture %v to config set" , nativeScmpArch )
250+ scmpArchs [nativeScmpArch ] = struct {}{}
251+ }
252+ logrus .Debugf ("seccomp: configured architecture set: %s" , scmpArchs )
242253
243- // Figure out native architecture representation of the architecture.
244- nativeArch , err := archToNative (arch )
254+ // Only loop over architectures which are present in the filter. Any other
255+ // architectures will get the libseccomp bad architecture action anyway.
256+ lastSyscalls := make (lastSyscallMap )
257+ for arch := range scmpArchs {
258+ auditArch , err := scmpArchToAuditArch (arch )
245259 if err != nil {
246260 return nil , fmt .Errorf ("cannot map architecture %v to AUDIT_ARCH_ constant: %w" , arch , err )
247261 }
248262
249- if _ , ok := lastSyscalls [nativeArch ]; ! ok {
250- lastSyscalls [nativeArch ] = map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall {}
263+ if _ , ok := lastSyscalls [auditArch ]; ! ok {
264+ lastSyscalls [auditArch ] = map [libseccomp.ScmpArch ]libseccomp.ScmpSyscall {}
251265 }
252- if _ , ok := lastSyscalls [nativeArch ][arch ]; ok {
266+ if _ , ok := lastSyscalls [auditArch ][arch ]; ok {
253267 // Because of ArchNative we may hit the same entry multiple times.
254- // Just skip it if we've seen this (nativeArch , ScmpArch)
268+ // Just skip it if we've seen this (linuxAuditArch , ScmpArch)
255269 // combination before.
256270 continue
257271 }
@@ -269,10 +283,11 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
269283 }
270284 }
271285 if largestSyscall != 0 {
272- lastSyscalls [nativeArch ][arch ] = largestSyscall
286+ logrus .Debugf ("seccomp: largest syscall number for arch %v is %v" , arch , largestSyscall )
287+ lastSyscalls [auditArch ][arch ] = largestSyscall
273288 } else {
274- logrus .Warnf ("could not find any syscalls for arch %s " , ociArch )
275- delete (lastSyscalls [nativeArch ], arch )
289+ logrus .Warnf ("could not find any syscalls for arch %v " , arch )
290+ delete (lastSyscalls [auditArch ], arch )
276291 }
277292 }
278293 return lastSyscalls , nil
@@ -290,10 +305,10 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
290305// close_range(2) which were added out-of-order in the syscall table between
291306// kernel releases.
292307func generateEnosysStub (lastSyscalls lastSyscallMap ) ([]bpf.Instruction , error ) {
293- // A jump-table for each nativeArch used to generate the initial
308+ // A jump-table for each linuxAuditArch used to generate the initial
294309 // conditional jumps -- measured from the *END* of the program so they
295310 // remain valid after prepending to the tail.
296- archJumpTable := map [nativeArch ]uint32 {}
311+ archJumpTable := map [linuxAuditArch ]uint32 {}
297312
298313 // Generate our own -ENOSYS rules for each architecture. They have to be
299314 // generated in reverse (prepended to the tail of the program) because the
@@ -306,7 +321,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
306321 }
307322
308323 // Generate the syscall -ENOSYS rules.
309- for nativeArch , maxSyscalls := range lastSyscalls {
324+ for auditArch , maxSyscalls := range lastSyscalls {
310325 // The number of instructions from the tail of this section which need
311326 // to be jumped in order to reach the -ENOSYS return. If the section
312327 // does not jump, it will fall through to the actual filter.
@@ -387,7 +402,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
387402
388403 // If we're on x86 we need to add a check for x32 and if we're in
389404 // the wrong mode we jump over the section.
390- if uint32 (nativeArch ) == uint32 (C .C_AUDIT_ARCH_X86_64 ) {
405+ if uint32 (auditArch ) == uint32 (C .C_AUDIT_ARCH_X86_64 ) {
391406 // Generate a prefix to check the mode.
392407 switch scmpArch {
393408 case libseccomp .ArchAMD64 :
@@ -416,8 +431,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
416431 section = append (section , sectionTail ... )
417432 case 2 :
418433 // x32 and x86_64 are a unique case, we can't handle any others.
419- if uint32 (nativeArch ) != uint32 (C .C_AUDIT_ARCH_X86_64 ) {
420- return nil , fmt .Errorf ("unknown architecture overlap on native arch %#x" , nativeArch )
434+ if uint32 (auditArch ) != uint32 (C .C_AUDIT_ARCH_X86_64 ) {
435+ return nil , fmt .Errorf ("unknown architecture overlap on native arch %#x" , auditArch )
421436 }
422437
423438 x32sysno , ok := maxSyscalls [libseccomp .ArchX32 ]
@@ -494,7 +509,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
494509 programTail = append (section , programTail ... )
495510
496511 // Update jump table.
497- archJumpTable [nativeArch ] = uint32 (len (programTail ))
512+ archJumpTable [auditArch ] = uint32 (len (programTail ))
498513 }
499514
500515 // Add a dummy "jump to filter" for any architecture we might miss below.
@@ -514,9 +529,9 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
514529 // architectures based on how large the jumps are going to be, or
515530 // re-sort the candidate architectures each time to make sure that we
516531 // pick the largest jump which is going to be smaller than 255.
517- for nativeArch := range lastSyscalls {
532+ for auditArch := range lastSyscalls {
518533 // We jump forwards but the jump table is calculated from the *END*.
519- jump := uint32 (len (programTail )) - archJumpTable [nativeArch ]
534+ jump := uint32 (len (programTail )) - archJumpTable [auditArch ]
520535
521536 // Same routine as above -- this is a basic jeq check, complicated
522537 // slightly if it turns out that we need to do a long jump.
@@ -525,7 +540,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
525540 // jeq [arch],[jump]
526541 bpf.JumpIf {
527542 Cond : bpf .JumpEqual ,
528- Val : uint32 (nativeArch ),
543+ Val : uint32 (auditArch ),
529544 SkipTrue : uint8 (jump ),
530545 },
531546 }, programTail ... )
@@ -534,7 +549,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
534549 // jne [arch],1
535550 bpf.JumpIf {
536551 Cond : bpf .JumpNotEqual ,
537- Val : uint32 (nativeArch ),
552+ Val : uint32 (auditArch ),
538553 SkipTrue : 1 ,
539554 },
540555 // ja [jump]
0 commit comments