diff --git a/proto/Makefile b/proto/Makefile new file mode 100644 index 000000000..94842fe6c --- /dev/null +++ b/proto/Makefile @@ -0,0 +1,50 @@ + +DESTDIR ?= . +PROTO_FILES := $(wildcard *.proto) +GO_DIR := $(DESTDIR)/go +GO_FILES := $(patsubst %.proto,$(GO_DIR)/%.pb.go,$(PROTO_FILES)) +PY_DIR := $(DESTDIR)/py +PY_FILES := $(patsubst %.proto,$(PY_DIR)/%_pb2.py,$(PROTO_FILES)) +C_DIR := $(DESTDIR)/c +C_SOURCE := $(patsubst %.proto,$(C_DIR)/%.pb-c.c,$(PROTO_FILES)) +C_HDR := $(patsubst %.proto,$(C_DIR)/%.pb-c.h,$(PROTO_FILES)) +C_FILES := $(C_SOURCE) $(C_HDR) +CPP_DIR := $(DESTDIR)/cpp +CPP_SOURCE := $(patsubst %.proto,$(CPP_DIR)/%.pb.cc,$(PROTO_FILES)) +CPP_HDR := $(patsubst %.proto,$(CPP_DIR)/%.pb.h,$(PROTO_FILES)) +CPP_FILES := $(CPP_SOURCE) $(CPP_HDR) + +default: example + +all: go py c cpp + +go: $(GO_FILES) + +$(GO_DIR)/%.pb.go: %.proto + @mkdir -p $(GO_DIR) + protoc --go_out=$(GO_DIR) $^ + +example: go + go run ./example.go + +c: $(C_FILES) + +$(C_DIR)/%.pb-c.c: %.proto + @mkdir -p $(C_DIR) + protoc-c --c_out=$(C_DIR) $^ + +cpp: $(CPP_FILES) + +$(CPP_DIR)/%.pb.cc: %.proto + @mkdir -p $(CPP_DIR) + protoc --cpp_out=$(CPP_DIR)/ $^ + +py: $(PY_FILES) + +$(PY_DIR)/%_pb2.py: %.proto + @mkdir -p $(PY_DIR) + protoc --python_out=$(PY_DIR) $^ + +clean: + rm -rf *~ $(GO_FILES) $(C_FILES) $(PY_FILES) $(CPP_FILES) + diff --git a/proto/config.proto b/proto/config.proto new file mode 100644 index 000000000..1b1ad8018 --- /dev/null +++ b/proto/config.proto @@ -0,0 +1,106 @@ +package oci; + +// Spec is the base configuration for the container. It specifies platform +// independent configuration. +message Spec { + // Version is the version of the specification that is supported. + optional string version = 1; + // Platform is the host information for OS and Arch. + optional Platform platform = 2; + // Process is the container's main process. + optional Process process = 3; + // Root is the root information for the container's filesystem. + optional Root root = 4; + // Hostname is the container's host name. + optional string hostname = 5; + // Mounts profile configuration for adding mounts to the container's + // filesystem. + repeated MountPoint mounts = 6; +} + + +// LinuxSpec is the full specification for linux containers. +message LinuxSpec { + optional Spec spec = 1; + // LinuxConfig is platform specific configuration for linux based + // containers. + optional LinuxConfig linux_config = 2; +} + +// LinuxConfig contains platform specific configuration for linux based +// containers. +message LinuxConfig { + // Capabilities are linux capabilities that are kept for the container. + repeated string capabilities = 1; +} + +// Platform specifies OS and arch information for the host system that the +// container is created for. +message Platform { + // OS is the operating system. + optional string os = 1; + // Arch is the architecture + optional string arch = 2; +} + +// Process contains information to start a specific application inside the +// container. +message Process { + // Terminal creates an interactive terminal for the container. + optional bool terminal = 1; + // User specifies user information for the process. + optional User user = 2; + // Args specifies the binary and arguments for the application to + // execute. + repeated string args = 3; + // Env populates the process environment for the process. + repeated string env = 4; + // Cwd is the current working directory for the process and must be + // relative to the container's root. + optional string cwd = 5; +} + +enum PlatformType { + UNKNOWN = 0; + LINUX = 1; +} + +// User specifies user information for the process. +message User { + // Type so that receivers of this message can `switch` for the fields + // expected + optional PlatformType type = 1; + + //optional LinuxUser linux_type = 2; + extensions 100 to 499; +} + +// LinuxUser specifies linux specific user and group information for the +// container's main process. +extend User { + // Uid is the user id. + optional int32 uid = 101; + // Gid is the group id. + optional int32 gid = 102; + repeated int32 additional_gids = 103; +} + +// Root contains information about the container's root filesystem on the host. +message Root { + // Path is the absolute path to the container's root filesystem. + optional string path = 1; + // Readonly makes the root filesystem for the container readonly before + // the process is executed. + optional bool readonly = 2; +} + +// MountPoint describes a directory that may be fullfilled by a mount in the +// runtime.json. +message MountPoint { + // Name is a unique descriptive identifier for this mount point. + optional string name = 1; + // Path specifies the path of the mount. The path and child directories + // MUST exist, a runtime MUST NOT create directories automatically to a + // mount point. + optional string path = 2; +} diff --git a/proto/example.go b/proto/example.go new file mode 100644 index 000000000..10ae604a5 --- /dev/null +++ b/proto/example.go @@ -0,0 +1,103 @@ +// +build ignore + +package main + +import ( + "encoding/hex" + "encoding/json" + "log" + + oci "./go/" + "github.com/golang/protobuf/jsonpb" + "github.com/golang/protobuf/proto" +) + +func main() { + s := &oci.LinuxSpec{ + Spec: &oci.Spec{ + Version: proto.String("0.3.0"), + Hostname: proto.String("darkstar"), + Platform: &oci.Platform{Os: proto.String("linux"), Arch: proto.String("x86_64")}, + Process: &oci.Process{ + Terminal: proto.Bool(true), + User: &oci.User{}, + Cwd: proto.String("/"), + Args: []string{"/bin/sh"}, + Env: []string{"TERM=linux"}, + }, + Root: &oci.Root{ + Path: proto.String("/"), + Readonly: proto.Bool(false), + }, + Mounts: []*oci.MountPoint{ + &oci.MountPoint{ + Name: proto.String("proc"), + Path: proto.String("/proc"), + }, + &oci.MountPoint{ + Name: proto.String("dev"), + Path: proto.String("/dev"), + }, + &oci.MountPoint{ + Name: proto.String("devpts"), + Path: proto.String("/dev/pts"), + }, + &oci.MountPoint{ + Name: proto.String("shm"), + Path: proto.String("/dev/shm"), + }, + &oci.MountPoint{ + Name: proto.String("mqueue"), + Path: proto.String("/dev/mqueue"), + }, + &oci.MountPoint{ + Name: proto.String("sysfs"), + Path: proto.String("/sys"), + }, + &oci.MountPoint{ + Name: proto.String("cgroup"), + Path: proto.String("/sys/fs/cgroup"), + }, + }, + }, + LinuxConfig: &oci.LinuxConfig{ + Capabilities: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + }, + } + + //proto.SetExtension(s.Spec, oci.E_Uid, 0) + + println("## Using github.com/golang/protobuf/jsonpb to marshal") + m := jsonpb.Marshaler{} + jsonStr, err := m.MarshalToString(s) + if err != nil { + log.Fatal(err) + } + println(jsonStr) + print("## len: ") + println(len(jsonStr)) + println("") + + println("## Using encoding/json to marshal") + buf, err := json.MarshalIndent(s, "", " ") + if err != nil { + log.Fatal(err) + } + println(string(buf)) + print("## len: ") + println(len(buf)) + println("") + + println("## Marshaling to protobuf binary message") + data, err := proto.Marshal(s) + if err != nil { + log.Fatal(err) + } + println(hex.Dump(data)) + print("## len: ") + println(len(data)) +} diff --git a/proto/runtime_config.proto b/proto/runtime_config.proto new file mode 100644 index 000000000..4f1db2eae --- /dev/null +++ b/proto/runtime_config.proto @@ -0,0 +1,271 @@ +package oci; + +// RuntimeSpec is the generic runtime state information on a running container +message RuntimeSpec { + // Mounts is a mapping of names to mount configurations. + // Which mounts will be mounted and where should be chosen with MountPoints + // in Spec. + repeated MountFieldEntry mounts = 1; + // Hooks are the commands run at various lifecycle events of the container. + optional Hooks hooks = 2; +} + +// MountFieldEntry is more backwards compatible protobuf associative map (than map) +message MountFieldEntry { + required string key = 1; + required Mount value = 2; +} + +// Mount specifies a mount for a container +message Mount { + // Type specifies the mount kind. + optional string type = 1; + // Source specifies the source path of the mount. In the case of bind mounts on + // linux based systems this would be the file on the host. + optional string source = 2; + // Options are fstab style mount options. + repeated string options = 3; +} + +// Hook specifies a command that is run at a particular event in the lifecycle of a container +message Hook { + optional string path = 1; + repeated string args = 2; + repeated string env = 3; +} + +// Hooks for container setup and teardown +message Hooks { + // Prestart is a list of hooks to be run before the container process is executed. + // On Linux, they are run after the container namespaces are created. + repeated Hook prestart = 1; + // Poststop is a list of hooks to be run after the container process exits. + repeated Hook poststop = 2; +} + +// LinuxStateDirectory holds the container's state information +message DefaultState { + // TODO(vbatts) not as elegant in some ways, but there is not a concept of const here + optional string directory = 1 [default = "/run/opencontainer/containers"]; +} + +/* +BEGIN Linux specific runtime +*/ + +// LinuxRuntimeSpec is the full specification for linux containers. +message LinuxRuntimeSpec { + optional RuntimeSpec runtime_spec = 1; + // LinuxRuntime is platform specific configuration for linux based containers. + optional LinuxRuntime linux = 2; +} + +// LinuxRuntime hosts the Linux-only runtime information +message LinuxRuntime { + // UidMapping specifies user mappings for supporting user namespaces on linux. + repeated IDMapping uid_mapping = 1; + // GidMapping specifies group mappings for supporting user namespaces on linux. + repeated IDMapping gid_mapping = 2; + // Rlimits specifies rlimit options to apply to the container's process. + repeated Rlimit rlimits = 3; + // Sysctl are a set of key value pairs that are set for the container on start + repeated StringStringEntry sysctl = 4; + // Resources contain cgroup information for handling resource constraints + // for the container + optional Resources resources = 5; + // CgroupsPath specifies the path to cgroups that are created and/or joined by the container. + // The path is expected to be relative to the cgroups mountpoint. + // If resources are specified, the cgroups at CgroupsPath will be updated based on resources. + optional string cgroups_path = 6; + // Namespaces contains the namespaces that are created and/or joined by the container + repeated Namespace namespaces = 7; + // Devices are a list of device nodes that are created and enabled for the container + repeated Device devices = 8; + // ApparmorProfile specified the apparmor profile for the container. + optional string apparmor_profile = 9; + // SelinuxProcessLabel specifies the selinux context that the container process is run as. + optional string selinux_process_label = 10; + // Seccomp specifies the seccomp security settings for the container. + optional Seccomp seccomp = 11; + // RootfsPropagation is the rootfs mount propagation mode for the container + optional string rootfs_propagation = 12; +} + +// IDMapping specifies UID/GID mappings +message IDMapping { + // HostID is the UID/GID of the host user or group + optional int32 host_id = 1; + // ContainerID is the UID/GID of the container's user or group + optional int32 container_id = 2; + // Size is the length of the range of IDs mapped between the two namespaces + optional int32 size = 3; +} + +// Rlimit type and restrictions +message Rlimit { + // Type of the rlimit to set + optional string type = 1; + // Hard is the hard limit for the specified type + optional uint64 hard = 2; + // Soft is the soft limit for the specified type + optional uint64 soft = 3; +} + +// StringStringEntry is more backwards compatible protobuf associative map (than map) +message StringStringEntry { + required string key = 1; + required string value = 2; +} + +// Resources has container runtime resource constraints +message Resources { + // DisableOOMKiller disables the OOM killer for out of memory conditions + optional bool disable_oom_killer = 1; + // Memory restriction configuration + optional Memory memory = 2; + // CPU resource restriction configuration + optional CPU cpu = 3; + // Task resource restriction configuration. + optional Pids pids = 4; + // BlockIO restriction configuration + optional BlockIO block_io = 5; + // Hugetlb limit (in bytes) + repeated HugepageLimit hugepage_limits = 6; + // Network restriction configuration + optional Network network = 7; +} + +// Memory for Linux cgroup 'memory' resource management +message Memory { + // Memory limit (in bytes) + optional int64 limit = 1; + // Memory reservation or soft_limit (in bytes) + optional int64 reservation = 2; + // Total memory usage (memory + swap); set `-1' to disable swap + optional int64 swap = 3; + // Kernel memory limit (in bytes) + optional int64 kernel = 4; + // How aggressive the kernel will swap memory pages. Range from 0 to 100. Set -1 to use system default + optional int64 Swappiness = 5; +} + +// CPU for Linux cgroup 'cpu' resource management +message CPU { + // CPU shares (relative weight vs. other cgroups with cpu shares) + optional int64 shares = 1; + // CPU hardcap limit (in usecs). Allowed cpu time in a given period + optional int64 quota = 2; + // CPU period to be used for hardcapping (in usecs). 0 to use system default + optional int64 period = 3; + // How many time CPU will use in realtime scheduling (in usecs) + optional int64 realtime_runtime = 4; + // CPU period to be used for realtime scheduling (in usecs) + optional int64 realtime_period = 5; + // CPU to use within the cpuset + optional string cpus = 6; + // MEM to use within the cpuset + optional string mems = 7; +} + +// Pids for Linux cgroup 'pids' resource management (Linux 4.3) +message Pids { + // Maximum number of PIDs. A value < 0 implies "no limit". + optional int64 limit = 1; +} + +// BlockIO for Linux cgroup 'blockio' resource management +message BlockIO { + // Specifies per cgroup weight, range is from 10 to 1000 + optional int64 weight = 1; + // Weight per cgroup per device, can override BlkioWeight + optional string weight_device = 2; + // IO read rate limit per cgroup per device, bytes per second + optional string throttle_read_bps_device = 3; + // IO write rate limit per cgroup per divice, bytes per second + optional string throttle_write_bps_device = 4; + // IO read rate limit per cgroup per device, IO per second + optional string throttle_read_iops_device = 5; + // IO write rate limit per cgroup per device, IO per second + optional string throttle_write_iops_device = 6; +} + +// HugepageLimit structure corresponds to limiting kernel hugepages +message HugepageLimit { + optional string pagesize = 1; + optional int32 limit = 2; +} + +// Network identification and priority configuration +message Network { + // Set class identifier for container's network packets + optional string class_id = 1; + // Set priority of network traffic for container + repeated InterfacePriority priorities = 2; +} + +// InterfacePriority for network interfaces +message InterfacePriority { + // Name is the name of the network interface + optional string name = 1; + // Priority for the interface + optional int64 priority = 2; +} + +// Namespace is the configuration for a linux namespace +message Namespace { + // Type is the type of Linux namespace + optional string type = 1; + // Path is a path to an existing namespace persisted on disk that can be joined + // and is of the same type + optional string path = 2; +} + +// Device represents the information on a Linux special device file +message Device { + // Path to the device. + optional string path = 1; + // Device type, block, char, etc. + // TODO(vbatts) ensure int32 is fine here, instead of golang's rune + optional int32 type = 2; + // Major is the device's major number. + optional int64 major = 3; + // Minor is the device's minor number. + optional int64 minor = 4; + // Cgroup permissions format, rwm. + optional string permissions = 5; + // FileMode permission bits for the device. + // TODO(vbatts) os.FileMode is an octal uint32 + optional uint32 file_mode = 6; + // Uid of the device. + optional uint32 uid = 7; + // Gid of the device. + optional uint32 gid = 8; +} + +// Seccomp represents syscall restrictions +message Seccomp { + // TODO(vbatts) string instead of "Action" type + optional string default_action = 1; + repeated Syscall syscalls = 2; +} + +// Syscall is used to match a syscall in Seccomp +message Syscall { + optional string name = 1; + optional string action = 2; + repeated Arg args = 3; +} + +// Arg used for matching specific syscall arguments in Seccomp +message Arg { + optional uint32 index = 1; + optional uint64 value = 2; + optional uint64 value_two = 3; + // Op is the operator string + optional string op = 4; +} + +/* +END Linux specific runtime +*/ +