diff --git a/proto/Makefile b/proto/Makefile new file mode 100644 index 000000000..9d27a55e2 --- /dev/null +++ b/proto/Makefile @@ -0,0 +1,31 @@ + +DESTDIR ?= . +PROTO_FILES := $(wildcard *.proto) +GO_FILES := $(patsubst %.proto,%.pb.go,$(PROTO_FILES)) +C_FILES := $(patsubst %.proto,%.pb-c.c,$(PROTO_FILES)) +C_HDR_FILES := $(patsubst %.proto,%.pb-c.h,$(PROTO_FILES)) +PY_FILES := $(patsubst %.proto,%_pb2.py,$(PROTO_FILES)) + +default: go + +all: go py c + +go: $(GO_FILES) + +%.pb.go: %.proto + protoc --go_out=$(DESTDIR) $^ + +c: $(C_FILES) + +%.pb-c.c: %.proto + protoc-c --c_out=$(DESTDIR) $^ + +py: $(PY_FILES) + +%_pb2.py: %.proto + protoc --python_out=$(DESTDIR) $^ + + +clean: + rm -rf *~ $(GO_FILES) $(C_FILES) $(C_HDR_FILES) $(PY_FILES) + diff --git a/proto/config.proto b/proto/config.proto new file mode 100644 index 000000000..8731b892a --- /dev/null +++ b/proto/config.proto @@ -0,0 +1,97 @@ +//package oci.config.bundle; +package oci; + +// Spec is the base configuration for the container. It specifies platform +// independent configuration. +message Spec { + // Version is the version of the specification that is supported. + optional string version = 1; + // Platform is the host information for OS and Arch. + optional Platform platform = 2; + // Process is the container's main process. + optional Process process = 3; + // Root is the root information for the container's filesystem. + optional Root root = 4; + // Hostname is the container's host name. + optional string hostname = 5; + // Mounts profile configuration for adding mounts to the container's + // filesystem. + repeated MountPoint mounts = 6; +} + + +// LinuxSpec is the full specification for linux containers. +message LinuxSpec { + optional Spec spec = 1; + // LinuxConfig is platform specific configuration for linux based + // containers. + optional LinuxConfig linux_config = 2; +} + +// LinuxConfig contains platform specific configuration for linux based +// containers. +message LinuxConfig { + // Capabilities are linux capabilities that are kept for the container. + repeated string capabilities = 1; +} + +// Platform specifies OS and arch information for the host system that the +// container is created for. +message Platform { + // OS is the operating system. + optional string os = 1; + // Arch is the architecture + optional string arch = 2; +} + +// Process contains information to start a specific application inside the +// container. +message Process { + // Terminal creates an interactive terminal for the container. + optional bool terminal = 1; + // User specifies user information for the process. + optional User user = 2; + // Args specifies the binary and arguments for the application to + // execute. + repeated string args = 3; + // Env populates the process environment for the process. + repeated string env = 4; + // Cwd is the current working directory for the process and must be + // relative to the container's root. + optional string cwd = 5; +} + +enum PlatformType { + UNKNOWN = 0; + LINUX = 1; +} + +// User specifies user information for the process. +message User { + // Type so that receivers of this message can `switch` for the fields + // expected + optional PlatformType type = 1; + + //optional LinuxUser linux_type = 2; + extensions 100 to 499; +} + +// Root contains information about the container's root filesystem on the host. +message Root { + // Path is the absolute path to the container's root filesystem. + optional string path = 1; + // Readonly makes the root filesystem for the container readonly before + // the process is executed. + optional bool readonly = 2; +} + +// MountPoint describes a directory that may be fullfilled by a mount in the +// runtime.json. +message MountPoint { + // Name is a unique descriptive identifier for this mount point. + optional string name = 1; + // Path specifies the path of the mount. The path and child directories + // MUST exist, a runtime MUST NOT create directories automatically to a + // mount point. + optional string path = 2; +} diff --git a/proto/config_linux.proto b/proto/config_linux.proto new file mode 100644 index 000000000..3a243742c --- /dev/null +++ b/proto/config_linux.proto @@ -0,0 +1,14 @@ +package oci; + +import "config.proto"; + +// LinuxUser specifies linux specific user and group information for the +// container's main process. +extend oci.User { + // Uid is the user id. + optional int32 uid = 101; + // Gid is the group id. + optional int32 gid = 102; + repeated int32 additional_gids = 103; +} + diff --git a/proto/runtime_config.proto b/proto/runtime_config.proto new file mode 100644 index 000000000..55e7bea94 --- /dev/null +++ b/proto/runtime_config.proto @@ -0,0 +1,54 @@ +//package oci.config.runtime; +package oci; + +import "runtime_config_linux.proto"; + +// RuntimeSpec is the generic runtime state information on a running container +message RuntimeSpec { + // Mounts is a mapping of names to mount configurations. + // Which mounts will be mounted and where should be chosen with MountPoints + // in Spec. + repeated MountFieldEntry mounts = 1; + // Hooks are the commands run at various lifecycle events of the container. + optional Hooks hooks = 2; +} + +// LinuxRuntimeSpec is the full specification for linux containers. +message LinuxRuntimeSpec { + optional RuntimeSpec runtime_spec = 1; + // LinuxRuntime is platform specific configuration for linux based containers. + optional oci.LinuxRuntime linux = 2; +} + +// MountFieldEntry is more backwards compatible protobuf associative map (than map) +message MountFieldEntry { + required string key = 1; + required Mount value = 2; +} + +// Mount specifies a mount for a container +message Mount { + // Type specifies the mount kind. + optional string type = 1; + // Source specifies the source path of the mount. In the case of bind mounts on + // linux based systems this would be the file on the host. + optional string source = 2; + // Options are fstab style mount options. + repeated string options = 3; +} + +// Hook specifies a command that is run at a particular event in the lifecycle of a container +message Hook { + optional string path = 1; + repeated string args = 2; + repeated string env = 3; +} + +// Hooks for container setup and teardown +message Hooks { + // Prestart is a list of hooks to be run before the container process is executed. + // On Linux, they are run after the container namespaces are created. + repeated Hook prestart = 1; + // Poststop is a list of hooks to be run after the container process exits. + repeated Hook poststop = 2; +} diff --git a/proto/runtime_config_linux.proto b/proto/runtime_config_linux.proto new file mode 100644 index 000000000..0a695e77e --- /dev/null +++ b/proto/runtime_config_linux.proto @@ -0,0 +1,213 @@ +package oci; + +// LinuxStateDirectory holds the container's state information +message DefaultState { + // TODO(vbatts) not as elegant in some ways, but there is not a concept of const here + optional string directory = 1 [default = "/run/opencontainer/containers"]; +} + +// LinuxRuntime hosts the Linux-only runtime information +message LinuxRuntime { + // UidMapping specifies user mappings for supporting user namespaces on linux. + repeated IDMapping uid_mapping = 1; + // GidMapping specifies group mappings for supporting user namespaces on linux. + repeated IDMapping gid_mapping = 2; + // Rlimits specifies rlimit options to apply to the container's process. + repeated Rlimit rlimits = 3; + // Sysctl are a set of key value pairs that are set for the container on start + repeated StringStringEntry sysctl = 4; + // Resources contain cgroup information for handling resource constraints + // for the container + optional Resources resources = 5; + // CgroupsPath specifies the path to cgroups that are created and/or joined by the container. + // The path is expected to be relative to the cgroups mountpoint. + // If resources are specified, the cgroups at CgroupsPath will be updated based on resources. + optional string cgroups_path = 6; + // Namespaces contains the namespaces that are created and/or joined by the container + repeated Namespace namespaces = 7; + // Devices are a list of device nodes that are created and enabled for the container + repeated Device devices = 8; + // ApparmorProfile specified the apparmor profile for the container. + optional string apparmor_profile = 9; + // SelinuxProcessLabel specifies the selinux context that the container process is run as. + optional string selinux_process_label = 10; + // Seccomp specifies the seccomp security settings for the container. + optional Seccomp seccomp = 11; + // RootfsPropagation is the rootfs mount propagation mode for the container + optional string rootfs_propagation = 12; +} + +// IDMapping specifies UID/GID mappings +message IDMapping { + // HostID is the UID/GID of the host user or group + optional int32 host_id = 1; + // ContainerID is the UID/GID of the container's user or group + optional int32 container_id = 2; + // Size is the length of the range of IDs mapped between the two namespaces + optional int32 size = 3; +} + +// Rlimit type and restrictions +message Rlimit { + // Type of the rlimit to set + optional string type = 1; + // Hard is the hard limit for the specified type + optional uint64 hard = 2; + // Soft is the soft limit for the specified type + optional uint64 soft = 3; +} + +// StringStringEntry is more backwards compatible protobuf associative map (than map) +message StringStringEntry { + required string key = 1; + required string value = 2; +} + +// Resources has container runtime resource constraints +message Resources { + // DisableOOMKiller disables the OOM killer for out of memory conditions + optional bool disable_oom_killer = 1; + // Memory restriction configuration + optional Memory memory = 2; + // CPU resource restriction configuration + optional CPU cpu = 3; + // Task resource restriction configuration. + optional Pids pids = 4; + // BlockIO restriction configuration + optional BlockIO block_io = 5; + // Hugetlb limit (in bytes) + repeated HugepageLimit hugepage_limits = 6; + // Network restriction configuration + optional Network network = 7; +} + +// Memory for Linux cgroup 'memory' resource management +message Memory { + // Memory limit (in bytes) + optional int64 limit = 1; + // Memory reservation or soft_limit (in bytes) + optional int64 reservation = 2; + // Total memory usage (memory + swap); set `-1' to disable swap + optional int64 swap = 3; + // Kernel memory limit (in bytes) + optional int64 kernel = 4; + // How aggressive the kernel will swap memory pages. Range from 0 to 100. Set -1 to use system default + optional int64 Swappiness = 5; +} + +// CPU for Linux cgroup 'cpu' resource management +message CPU { + // CPU shares (relative weight vs. other cgroups with cpu shares) + optional int64 shares = 1; + // CPU hardcap limit (in usecs). Allowed cpu time in a given period + optional int64 quota = 2; + // CPU period to be used for hardcapping (in usecs). 0 to use system default + optional int64 period = 3; + // How many time CPU will use in realtime scheduling (in usecs) + optional int64 realtime_runtime = 4; + // CPU period to be used for realtime scheduling (in usecs) + optional int64 realtime_period = 5; + // CPU to use within the cpuset + optional string cpus = 6; + // MEM to use within the cpuset + optional string mems = 7; +} + +// Pids for Linux cgroup 'pids' resource management (Linux 4.3) +message Pids { + // Maximum number of PIDs. A value < 0 implies "no limit". + optional int64 limit = 1; +} + +// BlockIO for Linux cgroup 'blockio' resource management +message BlockIO { + // Specifies per cgroup weight, range is from 10 to 1000 + optional int64 weight = 1; + // Weight per cgroup per device, can override BlkioWeight + optional string weight_device = 2; + // IO read rate limit per cgroup per device, bytes per second + optional string throttle_read_bps_device = 3; + // IO write rate limit per cgroup per divice, bytes per second + optional string throttle_write_bps_device = 4; + // IO read rate limit per cgroup per device, IO per second + optional string throttle_read_iops_device = 5; + // IO write rate limit per cgroup per device, IO per second + optional string throttle_write_iops_device = 6; +} + +// HugepageLimit structure corresponds to limiting kernel hugepages +message HugepageLimit { + optional string pagesize = 1; + optional int32 limit = 2; +} + +// Network identification and priority configuration +message Network { + // Set class identifier for container's network packets + optional string class_id = 1; + // Set priority of network traffic for container + repeated InterfacePriority priorities = 2; +} + +// InterfacePriority for network interfaces +message InterfacePriority { + // Name is the name of the network interface + optional string name = 1; + // Priority for the interface + optional int64 priority = 2; +} + +// Namespace is the configuration for a linux namespace +message Namespace { + // Type is the type of Linux namespace + optional string type = 1; + // Path is a path to an existing namespace persisted on disk that can be joined + // and is of the same type + optional string path = 2; +} + +// Device represents the information on a Linux special device file +message Device { + // Path to the device. + optional string path = 1; + // Device type, block, char, etc. + // TODO(vbatts) ensure int32 is fine here, instead of golang's rune + optional int32 type = 2; + // Major is the device's major number. + optional int64 major = 3; + // Minor is the device's minor number. + optional int64 minor = 4; + // Cgroup permissions format, rwm. + optional string permissions = 5; + // FileMode permission bits for the device. + // TODO(vbatts) os.FileMode is an octal uint32 + optional uint32 file_mode = 6; + // Uid of the device. + optional uint32 uid = 7; + // Gid of the device. + optional uint32 gid = 8; +} + +// Seccomp represents syscall restrictions +message Seccomp { + // TODO(vbatts) string instead of "Action" type + optional string default_action = 1; + repeated Syscall syscalls = 2; +} + +// Syscall is used to match a syscall in Seccomp +message Syscall { + optional string name = 1; + optional string action = 2; + repeated Arg args = 3; +} + +// Arg used for matching specific syscall arguments in Seccomp +message Arg { + optional uint32 index = 1; + optional uint64 value = 2; + optional uint64 value_two = 3; + // Op is the operator string + optional string op = 4; +} +