diff --git a/config-freebsd.md b/config-freebsd.md new file mode 100644 index 000000000..16e32048a --- /dev/null +++ b/config-freebsd.md @@ -0,0 +1,128 @@ +# FreeBSD Container Configuration + +This document describes the schema for the [FreeBSD-specific section](config.md#platform-specific-configuration) of the [container configuration](config.md). + +## Devices + +Devices in FreeBSD are accessed via the `devfs` filesystem. Each container SHOULD have a `devfs` filesystem mounted into its `/dev` directory. Often, a minimal set of devices is exposed to the container using ruleset 4 from `/etc/defaults/devfs.rules` - the ruleset is specified as a mount option. + +Optionally, additional devices can be exposed to the container using an array of entries inside the `devices` root field: + +* **`path`** _(string, REQUIRED)_ - the device path relative to `/dev` +* **`mode`** _(uint32, OPTIONAL)_ - file mode for the device. + +### Example +```json +"devices": [ + { + "path": "pf", + "mode": 448 + } +] +``` + +## Jail + +On FreeBSD, containers are implemented using the platform's jail subsystem. +Each jail is configured using a set of name/value pairs passed to the kernel using the `jail(2)` system calls. +The `jail` root field contains values which are passed to the kernel when the container is created. + +* **`parent`** _(string, OPTIONAL)_ - parent jail. + The value is the name of a jail which should be this container's parent (defaults to none). This can be used to share namespaces such as `vnet` with another container. +* **`host`** _(string, OPTIONAL)_ - allow overriding hostname, domainname, hostuuid and hostid. + The value can be "new" which allows these values to be overridden in the container or "inherit" to use the host values (or parent container values). If set to "new", the values for hostname and domainname are taken from the base config, if present. +* **`ip4`** _(string, OPTIONAL)_ - control the availability of IPv4 addresses. + This is typically left unset if the container has a vnet, set to "inherit" to allow access to host (or parent container) addresses or set to "disable" to stop use of IPv4 entirely. +* **`ip6`** _(string, OPTIONAL)_ - control the availability of IPv6 addresses. + This is typically left unset if the container has a vnet, set to "inherit" to allow access to host (or parent container) addresses or set to "disable" to stop use of IPv6 entirely. +* **`vnet`** _(string, OPTIONAL)_ - control the vnet used for this container. + The value can be "new" which causes a new vnet to be created for the container or "inherit" which shares the vnet for the parent container (or host if there is no parent). +* **`sysvmsg`** _(string, OPTIONAL)_ - allow access to SYSV IPC message primitives. + If set to "inherit", all IPC objects in the host (or parent container) are visible to this container, whether they were created by the container itself, the base system, or other containers. If set to "new", the container will have its own key namespace, and can only see the objects that it has created; the system (or parent container) has access to the container's objects, but not to its keys. If set to "disable", the container cannot perform any sysvmsg-related system calls. Defaults to "new". +* **`sysvsem`** _(string, OPTIONAL)_ - allow access to SYSV IPC semaphore primitives, in the same manner as sysvmsg. Defaults to "new". +* **`sysvshm`** _(string, OPTIONAL)_ - allow access to SYSV IPC shared memory primitives, in the same manner as sysvmsg. Defaults to "new". +* **`enforceStatfs`** _(integer, OPTIONAL)_ - control visibility of mounts in the container. + A value of 0 allows visibility of all host mounts, 1 allows visibility of mounts nested under the container's root and 2 only allows the container root to be visible. If unset, the default value is 2. +* **`allow`** _(object, OPTIONAL)_ - Some restrictions of the container environment may be set on a per-container basis. With the exception of **`setHostname`** and **`reservedPorts`**, these boolean parameters are off by default. + - **`setHostname`** _(bool, OPTIONAL)_ - Allow the container's hostname to be changed. Defaults to `false`. + - **`rawSockets`** _(bool, OPTIONAL)_ - Allow the container to use raw sockets to support network utilities such as ping and traceroute. Defaults to `false`. + - **`chflags`** _(bool, OPTIONAL)_ - Allow the system file flags to be changed. Defaults to `false`. + - **`mount`** _(array of strings, OPTIONAL)_ - Allow the listed filesystem types to be mounted and unmounted in the container. + - **`quotas`** _(bool, OPTIONAL)_ - Allow the filesystem quotas to be changed in the container. Defaults to `false`. + - **`socketAf`** _(bool, OPTIONAL)_ - Allow socket types other than IPv4, IPv6 and unix. Defaults to `false`. + - **`reservedPorts`** _(bool, OPTIONAL)_ - Allow the jail to bind to ports lower than 1024. Defaults to `false`. + - **`suser`** _(bool, OPTIONAL)_ - The value of the jail's security.bsd.suser_enabled sysctl. The super-user will be disabled automatically if its parent system has it disabled. The super-user is enabled by default. + +These fields SHOULD be mapped to a corresponding set of `jail(8)` parameters which can be used to create the container jail. +A typical jail-based OCI implementation on FreeBSD MAY use the following mapping: + +| Jail parameter | JSON equivalent | +| -------------- | -------------------- | +| `jid` | - | +| `name` | see below | +| `path` | `root.path` | +| `ip4.addr` | - | +| `ip4.saddrsel` | - | +| `ip4` | `freebsd.jail.ip4` | +| `ip6.addr` | - | +| `ip6.saddrsel` | - | +| `ip6` | `freebsd.jail.ip6` | +| `vnet` | `freebsd.jail.vnet` | +| `host.hostname` | `hostname` | +| `host` | `freebsd.jail.host` | +| `sysvmsg` | `freebsd.jail.sysvmsg` | +| `sysvsem` | `freebsd.jail.sysvsem` | +| `sysvshm` | `freebsd.jail.sysvshm` | +| `securelevel` | - | +| `devfs_ruleset` | see below | +| `children.max` | see below | +| `enforce_statfs` | `freebsd.jail.enforceStatfs` | +| `persist` | - | +| `parent` | `freebsd.jail.parent` | +| `osrelease` | - | +| `osreldate` | - | +| `allow.set_hostname` | `freebsd.jail.allow.setHostname` | +| `allow.sysvipc` | `freebsd.jail.allow.sysvipc` | +| `allow.raw_sockets` | `freebsd.jail.allow.rawSockets` | +| `allow.chflags` | `freebsd.jail.allow.chflags` | +| `allow.mount` | `freebsd.jail.allow.mount` | +| `allow.quotas` | `freebsd.jail.allow.quotas` | +| `allow.read_msgbuf` | - | +| `allow.socket_af` | `freebsd.jail.allow.socketAf` | +| `allow.mlock` | - | +| `allow.nfsd` | - | +| `allow.reserved_ports` | `freebsd.jail.allow.reservedPorts` | +| `allow.unprivileged_proc_debug` | - | +| `allow.suser` | `freebsd.jail.allow.suser` | +| `allow.mount.*` | see below | + +The jail name SHOULD be set to the create command's `container-id` argument. + +Network addresses are typically managed by the host (e.g. using CNI or netavark) so we do not include a mapping for `ip4.addr` or `ip6.addr`. + +A container which needs its own network namespace SHOULD set `"vnet"` to `"new"` and leave `"ip4"` and `"ip6"` unchanged. +A container which shares the parent/host vnet SHOULD leave `"vnet"` unchanged and set `"ip4"` and `"ip6"` to `"inherit"`. + +The `devfs_ruleset` parameter is only required for jails which create new `devfs` mounts - typically OCI runtimes will mount `devfs` on the host. The value is a rule set number - these rule sets are defined on the host, typically via `/etc/defaults/devfs.rules` or using the `devfs` command line utility. + +The `children.max` parameter SHOULD be managed by the OCI runtime e.g. when a new container shares namespaces with an existing container. + +The `allow.mount.*` parameter set is extensible - allowed mount types are listed as an array. As with `devfs`, typically the OCI runtime will manage mounts for the container by performing mount operations on the host. + +Jail parameters not supported by this runtime extension are marked with "-". These parameters will have their default values - see the `jail(8)` man page for details. + +### Example +```json +"jail": { + "host": "new", + "vnet": "new", + "enforceStatfs": 1, + "allow": { + "rawSockets": true, + "chflags": true, + "mount": [ + "tmpfs" + ] + } +} +``` diff --git a/config.md b/config.md index d642359d1..0055df9c4 100644 --- a/config.md +++ b/config.md @@ -518,14 +518,16 @@ For Windows based systems the user structure has the following fields: ## Platform-specific configuration +* **`freebsd`** (object, OPTIONAL) [FreeBSD-specific configuration](config-freebsd.md). + This MAY be set if the target platform of this spec is `freebsd`. * **`linux`** (object, OPTIONAL) [Linux-specific configuration](config-linux.md). This MAY be set if the target platform of this spec is `linux`. -* **`windows`** (object, OPTIONAL) [Windows-specific configuration](config-windows.md). - This MUST be set if the target platform of this spec is `windows`. * **`solaris`** (object, OPTIONAL) [Solaris-specific configuration](config-solaris.md). This MAY be set if the target platform of this spec is `solaris`. * **`vm`** (object, OPTIONAL) [Virtual-machine-specific configuration](config-vm.md). This MAY be set if the target platform and architecture of this spec support hardware virtualization. +* **`windows`** (object, OPTIONAL) [Windows-specific configuration](config-windows.md). + This MUST be set if the target platform of this spec is `windows`. * **`zos`** (object, OPTIONAL) [z/OS-specific configuration](config-zos.md). This MAY be set if the target platform of this spec is `zos`. diff --git a/schema/README.md b/schema/README.md index 5ae1df5a4..831ee142a 100644 --- a/schema/README.md +++ b/schema/README.md @@ -10,6 +10,7 @@ The layout of the files is as follows: * [config-linux.json](config-linux.json) - the [Linux-specific configuration sub-structure](../config-linux.md) * [config-solaris.json](config-solaris.json) - the [Solaris-specific configuration sub-structure](../config-solaris.md) * [config-windows.json](config-windows.json) - the [Windows-specific configuration sub-structure](../config-windows.md) +* [config-freebsd.json](config-freebsd.json) - the [FreeBSD-specific configuration sub-structure](../config-freebsd.md) * [state-schema.json](state-schema.json) - the primary entrypoint for the [state JSON](../runtime.md#state) schema * [defs.json](defs.json) - definitions for general types * [defs-linux.json](defs-linux.json) - definitions for Linux-specific types diff --git a/schema/config-freebsd.json b/schema/config-freebsd.json new file mode 100644 index 000000000..1ffeab5a5 --- /dev/null +++ b/schema/config-freebsd.json @@ -0,0 +1,75 @@ +{ + "freebsd": { + "description": "FreeBSD platform-specific configurations", + "type": "object", + "properties": { + "devices": { + "type": "array", + "items": { + "$ref": "defs-freebsd.json#/definitions/Device" + } + }, + "jail": { + "type": "object", + "properties": { + "parent": { + "type": "string" + }, + "host": { + "$ref": "defs-freebsd.json#/definitions/SharingModeNoDisable" + }, + "ip4": { + "$ref": "defs-freebsd.json#/definitions/SharingMode" + }, + "ip6": { + "$ref": "defs-freebsd.json#/definitions/SharingMode" + }, + "vnet": { + "$ref": "defs-freebsd.json#/definitions/SharingModeNoDisable" + }, + "sysvmsg": { + "$ref": "defs-freebsd.json#/definitions/SharingMode" + }, + "sysvsem": { + "$ref": "defs-freebsd.json#/definitions/SharingMode" + }, + "sysvshm": { + "$ref": "defs-freebsd.json#/definitions/SharingMode" + }, + "enforceStatfs": { + "$ref": "defs.json#/definitions/uint8" + }, + "allow": { + "type": "object", + "properties": { + "setHostname": { + "type": "boolean" + }, + "rawSockets": { + "type": "boolean" + }, + "chflags": { + "type": "boolean" + }, + "mount": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "quotas": { + "type": "boolean" + }, + "socketAf": { + "type": "boolean" + }, + "reservedPorts": { + "type": "boolean" + }, + "suser": { + "type": "boolean" + } + } + } + } + } + } + } +} diff --git a/schema/config-schema.json b/schema/config-schema.json index 5124def5f..f17f724fd 100644 --- a/schema/config-schema.json +++ b/schema/config-schema.json @@ -250,6 +250,9 @@ }, "zos": { "$ref": "config-zos.json#/zos" + }, + "freebsd": { + "$ref": "config-freebsd.json#/freebsd" } }, "required": [ diff --git a/schema/defs-freebsd.json b/schema/defs-freebsd.json new file mode 100644 index 000000000..2880bf252 --- /dev/null +++ b/schema/defs-freebsd.json @@ -0,0 +1,30 @@ +{ + "definitions": { + "Device": { + "type": "object", + "properties": { + "path": { + "type": "string" + }, + "mode": { + "$ref": "defs.json#/definitions/FileMode" + } + } + }, + "SharingMode": { + "type": "string", + "enum": [ + "disable", + "new", + "inherit" + ] + }, + "SharingModeNoDisable": { + "type": "string", + "enum": [ + "new", + "inherit" + ] + } + } +} diff --git a/schema/defs-linux.json b/schema/defs-linux.json index 4bf73d0fb..bb92e64d2 100644 --- a/schema/defs-linux.json +++ b/schema/defs-linux.json @@ -148,12 +148,6 @@ "description": "minor device number", "$ref": "defs.json#/definitions/int64" }, - "FileMode": { - "description": "File permissions mode (typically an octal value)", - "type": "integer", - "minimum": 0, - "maximum": 512 - }, "FileType": { "description": "Type of a block or special character device", "type": "string", @@ -173,7 +167,7 @@ "$ref": "defs.json#/definitions/FilePath" }, "fileMode": { - "$ref": "#/definitions/FileMode" + "$ref": "defs.json#/definitions/FileMode" }, "major": { "$ref": "#/definitions/Major" diff --git a/schema/defs.json b/schema/defs.json index a0bf846a1..a1e64161b 100644 --- a/schema/defs.json +++ b/schema/defs.json @@ -75,6 +75,12 @@ "type": "string" } }, + "FileMode": { + "description": "File permissions mode (typically an octal value)", + "type": "integer", + "minimum": 0, + "maximum": 512 + }, "FilePath": { "type": "string" }, diff --git a/schema/test/config/bad/freebsd-vnet-disable.json b/schema/test/config/bad/freebsd-vnet-disable.json new file mode 100644 index 000000000..dc6737c4a --- /dev/null +++ b/schema/test/config/bad/freebsd-vnet-disable.json @@ -0,0 +1,11 @@ +{ + "ociVersion": "1.3.0", + "root": { + "path": "rootfs" + }, + "freebsd": { + "jail": { + "vnet": "disable" + } + } +} diff --git a/schema/test/config/good/freebsd-example.json b/schema/test/config/good/freebsd-example.json new file mode 100644 index 000000000..5025ce465 --- /dev/null +++ b/schema/test/config/good/freebsd-example.json @@ -0,0 +1,54 @@ +{ + "ociVersion": "1.3.0", + "process": { + "terminal": true, + "args": [ + "sh" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/" + }, + "root": { + "path": "rootfs" + }, + "hostname": "slartibartfast", + "mounts": [ + { + "destination": "/dev", + "type": "devfs", + "source": "devfs", + "options": [ + "ruleset=4" + ] + }, + { + "destination": "/dev/fd", + "type": "fdescfs", + "source": "fdescfs", + "options": [] + } + ], + "freebsd": { + "devices": [ + { + "path": "pf", + "mode": 448 + } + ], + "jail": { + "host": "new", + "vnet": "new", + "enforceStatfs": 1, + "allow": { + "rawSockets": true, + "chflags": true, + "mount": [ + "tmpfs" + ] + } + } + } +} diff --git a/schema/test/config/good/freebsd-minimal.json b/schema/test/config/good/freebsd-minimal.json new file mode 100644 index 000000000..f1d45a388 --- /dev/null +++ b/schema/test/config/good/freebsd-minimal.json @@ -0,0 +1,7 @@ +{ + "ociVersion": "1.3.0", + "root": { + "path": "rootfs" + }, + "freebsd": {} +} diff --git a/specs-go/config.go b/specs-go/config.go index 854290da2..1018f50b5 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -31,6 +31,8 @@ type Spec struct { VM *VM `json:"vm,omitempty" platform:"vm"` // ZOS is platform-specific configuration for z/OS based containers. ZOS *ZOS `json:"zos,omitempty" platform:"zos"` + // FreeBSD is platform-specific configuration for FreeBSD based containers. + FreeBSD *FreeBSD `json:"freebsd,omitempty" platform:"freebsd"` } // Scheduler represents the scheduling attributes for a process. It is based on @@ -170,7 +172,7 @@ type Mount struct { // Destination is the absolute path where the mount will be placed in the container. Destination string `json:"destination"` // Type specifies the mount kind. - Type string `json:"type,omitempty" platform:"linux,solaris,zos"` + Type string `json:"type,omitempty" platform:"linux,solaris,zos,freebsd"` // Source specifies the source path of the mount. Source string `json:"source,omitempty"` // Options are fstab style mount options. @@ -923,3 +925,66 @@ const ( // SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX" ) + +// FreeBSD contains platform-specific configuration for FreeBSD based containers. +type FreeBSD struct { + // Devices which are accessible in the container + Devices []FreeBSDDevice `json:"devices,omitempty"` + // Jail definition for this container + Jail *FreeBSDJail `json:"jail,omitempty"` +} + +type FreeBSDDevice struct { + // Path to the device, relative to /dev. + Path string `json:"path"` + // FileMode permission bits for the device. + Mode *os.FileMode `json:"mode,omitempty"` +} + +// FreeBSDJail describes how to configure the container's jail +type FreeBSDJail struct { + // Parent jail name - this can be used to share a single vnet + // across several containers + Parent string `json:"parent,omitempty"` + // Whether to use parent UTS names or override in the container + Host FreeBSDSharing `json:"host,omitempty"` + // IPv4 address sharing for the container + Ip4 FreeBSDSharing `json:"ip4,omitempty"` + // IPv6 address sharing for the container + Ip6 FreeBSDSharing `json:"ip6,omitempty"` + // Which network stack to use for the container + Vnet FreeBSDSharing `json:"vnet,omitempty"` + // SystemV IPC message sharing for the container + SysVMsg FreeBSDSharing `json:"sysvmsg,omitempty"` + // SystemV semaphore message sharing for the container + SysVSem FreeBSDSharing `json:"sysvsem,omitempty"` + // SystemV memory sharing for the container + SysVShm FreeBSDSharing `json:"sysvshm,omitempty"` + // Mount visibility (see jail(8) for details) + EnforceStatfs *int `json:"enforceStatfs,omitempty"` + // Jail capabilities + Allow *FreeBSDJailAllow `json:"allow,omitempty"` +} + +// These values are used to control access to features in the container, either +// disabling the feature, sharing state with the parent or creating new private +// state in the container. +type FreeBSDSharing string + +const ( + ShareDisable FreeBSDSharing = "disable" + ShareNew FreeBSDSharing = "new" + ShareInherit FreeBSDSharing = "inherit" +) + +// FreeBSDJailAllow describes jail capabilities +type FreeBSDJailAllow struct { + SetHostname bool `json:"setHostname,omitempty"` + RawSockets bool `json:"rawSockets,omitempty"` + Chflags bool `json:"chflags,omitempty"` + Mount []string `json:"mount,omitempty"` + Quotas bool `json:"quotas,omitempty"` + SocketAf bool `json:"socketAf,omitempty"` + ReservedPorts bool `json:"reservedPorts,omitempty"` + Suser bool `json:"suser,omitempty"` +}