Skip to content

Commit 1a8b558

Browse files
committed
merge #4024 into opencontainers/runc:main
Rodrigo Campos (1): libct/dmz: Reduce the binary size using nolibc LGTMs: AkihiroSuda cyphar
2 parents a32ad76 + 90f5da6 commit 1a8b558

32 files changed

+5110
-2
lines changed

libcontainer/dmz/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
include ../../cc_platform.mk
33

44
runc-dmz: _dmz.c
5-
$(CC) $(CFLAGS) -static -o $@ $^
5+
# We use the flags suggested in nolibc/nolibc.h, it makes the binary very small.
6+
$(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib -lgcc -static -o $@ $^
67
$(STRIP) -gs $@

libcontainer/dmz/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Runc-dmz
2+
3+
runc-dmz is a small and very simple binary used to execute the container's entrypoint.
4+
5+
## Making it small
6+
7+
To make it small we use the Linux kernel's [nolibc include files][nolibc-upstream], so we don't use the libc.
8+
9+
A full `cp` of it is here in `nolibc/`, but removing the Makefile that is GPL. DO NOT FORGET to
10+
remove the GPL code if updating the nolibc/ directory.
11+
12+
The current version in that folder is from Linux 6.6-rc3 tag (556fb7131e03b0283672fb40f6dc2d151752aaa7).
13+
14+
It also support all the architectures we support in runc.
15+
16+
[nolibc-upstream]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/include/nolibc?h=v6.6-rc3

libcontainer/dmz/_dmz.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
#include <unistd.h>
1+
#include "xstat.h"
2+
#include "nolibc/nolibc.h"
23

34
extern char **environ;
45

libcontainer/dmz/linux/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
This directory contains some files copied from Linux's repo, from the uapi:
2+
3+
tools/include/uapi/linux/
4+
5+
The linux repo was used at Linux 6.6.-rc3 tag (556fb7131e03b0283672fb40f6dc2d151752aaa7).

libcontainer/dmz/linux/stat.h

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2+
#ifndef _UAPI_LINUX_STAT_H
3+
#define _UAPI_LINUX_STAT_H
4+
5+
#include <linux/types.h>
6+
7+
#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
8+
9+
#define S_IFMT 00170000
10+
#define S_IFSOCK 0140000
11+
#define S_IFLNK 0120000
12+
#define S_IFREG 0100000
13+
#define S_IFBLK 0060000
14+
#define S_IFDIR 0040000
15+
#define S_IFCHR 0020000
16+
#define S_IFIFO 0010000
17+
#define S_ISUID 0004000
18+
#define S_ISGID 0002000
19+
#define S_ISVTX 0001000
20+
21+
#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
22+
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
23+
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
24+
#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
25+
#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
26+
#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
27+
#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
28+
29+
#define S_IRWXU 00700
30+
#define S_IRUSR 00400
31+
#define S_IWUSR 00200
32+
#define S_IXUSR 00100
33+
34+
#define S_IRWXG 00070
35+
#define S_IRGRP 00040
36+
#define S_IWGRP 00020
37+
#define S_IXGRP 00010
38+
39+
#define S_IRWXO 00007
40+
#define S_IROTH 00004
41+
#define S_IWOTH 00002
42+
#define S_IXOTH 00001
43+
44+
#endif
45+
46+
/*
47+
* Timestamp structure for the timestamps in struct statx.
48+
*
49+
* tv_sec holds the number of seconds before (negative) or after (positive)
50+
* 00:00:00 1st January 1970 UTC.
51+
*
52+
* tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
53+
*
54+
* __reserved is held in case we need a yet finer resolution.
55+
*/
56+
struct statx_timestamp {
57+
__s64 tv_sec;
58+
__u32 tv_nsec;
59+
__s32 __reserved;
60+
};
61+
62+
/*
63+
* Structures for the extended file attribute retrieval system call
64+
* (statx()).
65+
*
66+
* The caller passes a mask of what they're specifically interested in as a
67+
* parameter to statx(). What statx() actually got will be indicated in
68+
* st_mask upon return.
69+
*
70+
* For each bit in the mask argument:
71+
*
72+
* - if the datum is not supported:
73+
*
74+
* - the bit will be cleared, and
75+
*
76+
* - the datum will be set to an appropriate fabricated value if one is
77+
* available (eg. CIFS can take a default uid and gid), otherwise
78+
*
79+
* - the field will be cleared;
80+
*
81+
* - otherwise, if explicitly requested:
82+
*
83+
* - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
84+
* set or if the datum is considered out of date, and
85+
*
86+
* - the field will be filled in and the bit will be set;
87+
*
88+
* - otherwise, if not requested, but available in approximate form without any
89+
* effort, it will be filled in anyway, and the bit will be set upon return
90+
* (it might not be up to date, however, and no attempt will be made to
91+
* synchronise the internal state first);
92+
*
93+
* - otherwise the field and the bit will be cleared before returning.
94+
*
95+
* Items in STATX_BASIC_STATS may be marked unavailable on return, but they
96+
* will have values installed for compatibility purposes so that stat() and
97+
* co. can be emulated in userspace.
98+
*/
99+
struct statx {
100+
/* 0x00 */
101+
__u32 stx_mask; /* What results were written [uncond] */
102+
__u32 stx_blksize; /* Preferred general I/O size [uncond] */
103+
__u64 stx_attributes; /* Flags conveying information about the file [uncond] */
104+
/* 0x10 */
105+
__u32 stx_nlink; /* Number of hard links */
106+
__u32 stx_uid; /* User ID of owner */
107+
__u32 stx_gid; /* Group ID of owner */
108+
__u16 stx_mode; /* File mode */
109+
__u16 __spare0[1];
110+
/* 0x20 */
111+
__u64 stx_ino; /* Inode number */
112+
__u64 stx_size; /* File size */
113+
__u64 stx_blocks; /* Number of 512-byte blocks allocated */
114+
__u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
115+
/* 0x40 */
116+
struct statx_timestamp stx_atime; /* Last access time */
117+
struct statx_timestamp stx_btime; /* File creation time */
118+
struct statx_timestamp stx_ctime; /* Last attribute change time */
119+
struct statx_timestamp stx_mtime; /* Last data modification time */
120+
/* 0x80 */
121+
__u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
122+
__u32 stx_rdev_minor;
123+
__u32 stx_dev_major; /* ID of device containing file [uncond] */
124+
__u32 stx_dev_minor;
125+
/* 0x90 */
126+
__u64 stx_mnt_id;
127+
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
128+
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
129+
/* 0xa0 */
130+
__u64 __spare3[12]; /* Spare space for future expansion */
131+
/* 0x100 */
132+
};
133+
134+
/*
135+
* Flags to be stx_mask
136+
*
137+
* Query request/result mask for statx() and struct statx::stx_mask.
138+
*
139+
* These bits should be set in the mask argument of statx() to request
140+
* particular items when calling statx().
141+
*/
142+
#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
143+
#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
144+
#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
145+
#define STATX_UID 0x00000008U /* Want/got stx_uid */
146+
#define STATX_GID 0x00000010U /* Want/got stx_gid */
147+
#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
148+
#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
149+
#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
150+
#define STATX_INO 0x00000100U /* Want/got stx_ino */
151+
#define STATX_SIZE 0x00000200U /* Want/got stx_size */
152+
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
153+
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
154+
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
155+
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
156+
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
157+
158+
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
159+
160+
#ifndef __KERNEL__
161+
/*
162+
* This is deprecated, and shall remain the same value in the future. To avoid
163+
* confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
164+
* instead.
165+
*/
166+
#define STATX_ALL 0x00000fffU
167+
#endif
168+
169+
/*
170+
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
171+
*
172+
* These give information about the features or the state of a file that might
173+
* be of use to ordinary userspace programs such as GUIs or ls rather than
174+
* specialised tools.
175+
*
176+
* Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags
177+
* semantically. Where possible, the numerical value is picked to correspond
178+
* also. Note that the DAX attribute indicates that the file is in the CPU
179+
* direct access state. It does not correspond to the per-inode flag that
180+
* some filesystems support.
181+
*
182+
*/
183+
#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
184+
#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
185+
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
186+
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
187+
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
188+
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
189+
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
190+
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
191+
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
192+
193+
194+
#endif /* _UAPI_LINUX_STAT_H */
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2+
/*
3+
* AARCH64 specific definitions for NOLIBC
4+
* Copyright (C) 2017-2022 Willy Tarreau <[email protected]>
5+
*/
6+
7+
#ifndef _NOLIBC_ARCH_AARCH64_H
8+
#define _NOLIBC_ARCH_AARCH64_H
9+
10+
#include "compiler.h"
11+
#include "crt.h"
12+
13+
/* Syscalls for AARCH64 :
14+
* - registers are 64-bit
15+
* - stack is 16-byte aligned
16+
* - syscall number is passed in x8
17+
* - arguments are in x0, x1, x2, x3, x4, x5
18+
* - the system call is performed by calling svc 0
19+
* - syscall return comes in x0.
20+
* - the arguments are cast to long and assigned into the target registers
21+
* which are then simply passed as registers to the asm code, so that we
22+
* don't have to experience issues with register constraints.
23+
*
24+
* On aarch64, select() is not implemented so we have to use pselect6().
25+
*/
26+
#define __ARCH_WANT_SYS_PSELECT6
27+
28+
#define my_syscall0(num) \
29+
({ \
30+
register long _num __asm__ ("x8") = (num); \
31+
register long _arg1 __asm__ ("x0"); \
32+
\
33+
__asm__ volatile ( \
34+
"svc #0\n" \
35+
: "=r"(_arg1) \
36+
: "r"(_num) \
37+
: "memory", "cc" \
38+
); \
39+
_arg1; \
40+
})
41+
42+
#define my_syscall1(num, arg1) \
43+
({ \
44+
register long _num __asm__ ("x8") = (num); \
45+
register long _arg1 __asm__ ("x0") = (long)(arg1); \
46+
\
47+
__asm__ volatile ( \
48+
"svc #0\n" \
49+
: "=r"(_arg1) \
50+
: "r"(_arg1), \
51+
"r"(_num) \
52+
: "memory", "cc" \
53+
); \
54+
_arg1; \
55+
})
56+
57+
#define my_syscall2(num, arg1, arg2) \
58+
({ \
59+
register long _num __asm__ ("x8") = (num); \
60+
register long _arg1 __asm__ ("x0") = (long)(arg1); \
61+
register long _arg2 __asm__ ("x1") = (long)(arg2); \
62+
\
63+
__asm__ volatile ( \
64+
"svc #0\n" \
65+
: "=r"(_arg1) \
66+
: "r"(_arg1), "r"(_arg2), \
67+
"r"(_num) \
68+
: "memory", "cc" \
69+
); \
70+
_arg1; \
71+
})
72+
73+
#define my_syscall3(num, arg1, arg2, arg3) \
74+
({ \
75+
register long _num __asm__ ("x8") = (num); \
76+
register long _arg1 __asm__ ("x0") = (long)(arg1); \
77+
register long _arg2 __asm__ ("x1") = (long)(arg2); \
78+
register long _arg3 __asm__ ("x2") = (long)(arg3); \
79+
\
80+
__asm__ volatile ( \
81+
"svc #0\n" \
82+
: "=r"(_arg1) \
83+
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
84+
"r"(_num) \
85+
: "memory", "cc" \
86+
); \
87+
_arg1; \
88+
})
89+
90+
#define my_syscall4(num, arg1, arg2, arg3, arg4) \
91+
({ \
92+
register long _num __asm__ ("x8") = (num); \
93+
register long _arg1 __asm__ ("x0") = (long)(arg1); \
94+
register long _arg2 __asm__ ("x1") = (long)(arg2); \
95+
register long _arg3 __asm__ ("x2") = (long)(arg3); \
96+
register long _arg4 __asm__ ("x3") = (long)(arg4); \
97+
\
98+
__asm__ volatile ( \
99+
"svc #0\n" \
100+
: "=r"(_arg1) \
101+
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
102+
"r"(_num) \
103+
: "memory", "cc" \
104+
); \
105+
_arg1; \
106+
})
107+
108+
#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
109+
({ \
110+
register long _num __asm__ ("x8") = (num); \
111+
register long _arg1 __asm__ ("x0") = (long)(arg1); \
112+
register long _arg2 __asm__ ("x1") = (long)(arg2); \
113+
register long _arg3 __asm__ ("x2") = (long)(arg3); \
114+
register long _arg4 __asm__ ("x3") = (long)(arg4); \
115+
register long _arg5 __asm__ ("x4") = (long)(arg5); \
116+
\
117+
__asm__ volatile ( \
118+
"svc #0\n" \
119+
: "=r" (_arg1) \
120+
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
121+
"r"(_num) \
122+
: "memory", "cc" \
123+
); \
124+
_arg1; \
125+
})
126+
127+
#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
128+
({ \
129+
register long _num __asm__ ("x8") = (num); \
130+
register long _arg1 __asm__ ("x0") = (long)(arg1); \
131+
register long _arg2 __asm__ ("x1") = (long)(arg2); \
132+
register long _arg3 __asm__ ("x2") = (long)(arg3); \
133+
register long _arg4 __asm__ ("x3") = (long)(arg4); \
134+
register long _arg5 __asm__ ("x4") = (long)(arg5); \
135+
register long _arg6 __asm__ ("x5") = (long)(arg6); \
136+
\
137+
__asm__ volatile ( \
138+
"svc #0\n" \
139+
: "=r" (_arg1) \
140+
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
141+
"r"(_arg6), "r"(_num) \
142+
: "memory", "cc" \
143+
); \
144+
_arg1; \
145+
})
146+
147+
/* startup code */
148+
void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
149+
{
150+
__asm__ volatile (
151+
"mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */
152+
"and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */
153+
"bl _start_c\n" /* transfer to c runtime */
154+
);
155+
__builtin_unreachable();
156+
}
157+
#endif /* _NOLIBC_ARCH_AARCH64_H */

0 commit comments

Comments
 (0)