Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 6 additions & 18 deletions pkg/components/exec_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,12 @@ func NewExecNode(
)

criConfig := ytconfig.NewCRIConfigGenerator(&spec)

var sidecarConfig *ConfigMapBuilder
if criConfig.Service == ytv1.CRIServiceContainerd {
sidecarConfig = NewConfigMapBuilder(
l,
ytsaurus.APIProxy(),
l.GetSidecarConfigMapName(consts.JobsContainerName),
ytsaurus.GetResource().Spec.ConfigOverrides,
)

sidecarConfig.AddGenerator(
consts.ContainerdConfigFileName,
ConfigFormatToml,
func() ([]byte, error) {
return criConfig.GetContainerdConfig()
},
)
}
sidecarConfig := NewJobsSidecarConfig(
l,
ytsaurus.APIProxy(),
criConfig,
ytsaurus.GetCommonSpec().ConfigOverrides,
)

if criConfig.MonitoringPort != 0 {
srv.addMonitoringPort(corev1.ServicePort{
Expand Down
51 changes: 49 additions & 2 deletions pkg/components/exec_node_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import (

ytv1 "github.com/ytsaurus/ytsaurus-k8s-operator/api/v1"

"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/apiproxy"
"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/consts"
"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/labeller"
"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/resources"
"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/ytconfig"
)
Expand Down Expand Up @@ -110,7 +112,11 @@ func (n *baseExecNode) addCRIServiceConfig(cri *ytconfig.CRIConfigGenerator, con
case ytv1.CRIServiceNone:
return
case ytv1.CRIServiceCRIO:
container.Env = append(container.Env, cri.GetCRIOEnv()...)
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: consts.CRIOConfigVolumeName,
MountPath: consts.CRIOConfigMountPoint,
ReadOnly: true,
})
case ytv1.CRIServiceContainerd:
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: consts.ContainerdConfigVolumeName,
Expand Down Expand Up @@ -151,9 +157,13 @@ func (n *baseExecNode) addCRIServiceSidecar(cri *ytconfig.CRIConfigGenerator, po
},
}

if cri.Service == ytv1.CRIServiceContainerd {
switch cri.Service {
case ytv1.CRIServiceContainerd:
configPath := path.Join(consts.ContainerdConfigMountPoint, consts.ContainerdConfigFileName)
container.Args = []string{"--config", configPath}
case ytv1.CRIServiceCRIO:
configPath := path.Join(consts.CRIOConfigMountPoint, consts.CRIOConfigFileName)
container.Args = []string{"--config", configPath, "--config-dir", ""}
}

n.addCRIServiceConfig(cri, &container)
Expand Down Expand Up @@ -204,3 +214,40 @@ func (n *baseExecNode) sidecarConfigNeedsReload() bool {
}
return needsReload
}

func NewJobsSidecarConfig(
labeller *labeller.Labeller,
apiProxy apiproxy.APIProxy,
criConfig *ytconfig.CRIConfigGenerator,
configOverrides *corev1.LocalObjectReference,
) *ConfigMapBuilder {
config := NewConfigMapBuilder(
labeller,
apiProxy,
labeller.GetSidecarConfigMapName(consts.JobsContainerName),
configOverrides,
)

switch criConfig.Service {
case ytv1.CRIServiceNone:
config = nil
case ytv1.CRIServiceContainerd:
config.AddGenerator(
consts.ContainerdConfigFileName,
ConfigFormatToml,
func() ([]byte, error) {
return criConfig.GetContainerdConfig()
},
)
case ytv1.CRIServiceCRIO:
config.AddGenerator(
consts.CRIOConfigFileName,
ConfigFormatToml,
func() ([]byte, error) {
return criConfig.GetCRIOConfig()
},
)
}

return config
}
30 changes: 14 additions & 16 deletions pkg/components/exec_node_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/intstr"

ytv1 "github.com/ytsaurus/ytsaurus-k8s-operator/api/v1"
"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/apiproxy"
Expand Down Expand Up @@ -44,23 +45,20 @@ func NewRemoteExecNodes(
)

criConfig := ytconfig.NewCRIConfigGenerator(&spec)
sidecarConfig := NewJobsSidecarConfig(
l,
proxy,
criConfig,
commonSpec.ConfigOverrides,
)

var sidecarConfig *ConfigMapBuilder
if criConfig.Service == ytv1.CRIServiceContainerd {
sidecarConfig = NewConfigMapBuilder(
l,
proxy,
l.GetSidecarConfigMapName(consts.JobsContainerName),
commonSpec.ConfigOverrides,
)

sidecarConfig.AddGenerator(
consts.ContainerdConfigFileName,
ConfigFormatToml,
func() ([]byte, error) {
return criConfig.GetContainerdConfig()
},
)
if criConfig.MonitoringPort != 0 {
srv.addMonitoringPort(corev1.ServicePort{
Name: consts.CRIServiceMonitoringPortName,
Protocol: corev1.ProtocolTCP,
Port: criConfig.MonitoringPort,
TargetPort: intstr.FromInt32(criConfig.MonitoringPort),
})
}

return &RemoteExecNode{
Expand Down
4 changes: 4 additions & 0 deletions pkg/consts/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ const (

CRIServiceSocketName = "cri.sock"

CRIOConfigVolumeName = "config-crio"
CRIOConfigMountPoint = "/config/crio"
CRIOConfigFileName = "crio.conf"

CRINamespace = "yt"
CRIBaseCgroup = "/yt"

Expand Down
7 changes: 7 additions & 0 deletions pkg/testutil/spec_builders.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ type YtsaurusBuilder struct {
Ytsaurus *ytv1.Ytsaurus
Overrides *corev1.ConfigMap

WithNvidiaContainerRuntime bool

// Set MinReadyInstanceCount for all components
MinReadyInstanceCount *int
}
Expand Down Expand Up @@ -517,6 +519,11 @@ func (b *YtsaurusBuilder) SetupCRIJobEnvironment(node *ytv1.ExecNodesSpec) {
SandboxImage: b.SandboxImage,
},
}
if b.WithNvidiaContainerRuntime {
node.JobEnvironment.Runtime = &ytv1.JobRuntimeSpec{
Nvidia: &ytv1.NvidiaRuntimeSpec{},
}
}
}

func (b *YtsaurusBuilder) WithCRIJobEnvironment() {
Expand Down
115 changes: 94 additions & 21 deletions pkg/ytconfig/cri.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,24 @@ import (
"github.com/ytsaurus/ytsaurus-k8s-operator/pkg/consts"
)

const (
runtimeTypeOCI = "oci"

runtimeNameRunc = "runc"
crioRuntimePathRunc = "/usr/libexec/crio/runc"

runtimeNameCrun = "crun"
crioRuntimePathCrun = "/usr/libexec/crio/crun"

runtimeNameNvidia = "nvidia"
runtimePathNvidia = "/usr/bin/nvidia-container-runtime"

crioMonitorCgroup = "pod"
crioMonitorPath = "/usr/libexec/crio/conmon"

shmSizeAnnotation = "io.kubernetes.cri-o.ShmSize"
)

type CRIConfigGenerator struct {
Service ytv1.CRIServiceType
Spec ytv1.CRIJobEnvironmentSpec
Expand Down Expand Up @@ -69,29 +87,84 @@ func (cri *CRIConfigGenerator) GetCRIToolsEnv() []corev1.EnvVar {
return env
}

func (cri *CRIConfigGenerator) GetCRIOEnv() []corev1.EnvVar {
var env []corev1.EnvVar
func (cri *CRIConfigGenerator) GetCRIOConfig() ([]byte, error) {
// See https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md

crioAPI := map[string]any{
"listen": cri.GetSocketPath(),
}

crioImage := map[string]any{}

crioMetrics := map[string]any{}

crioRuntimeRuntimes := map[string]any{
runtimeNameRunc: map[string]any{
"runtime_type": runtimeTypeOCI,
"runtime_path": crioRuntimePathRunc,
"allowed_annotations": []string{
shmSizeAnnotation,
},
"monitor_cgroup": crioMonitorCgroup,
"monitor_path": crioMonitorPath,
},
runtimeNameCrun: map[string]any{
"runtime_type": runtimeTypeOCI,
"runtime_path": crioRuntimePathCrun,
"allowed_annotations": []string{
shmSizeAnnotation,
},
"monitor_cgroup": crioMonitorCgroup,
"monitor_path": crioMonitorPath,
},
}

crioRuntime := map[string]any{
"cgroup_manager": "cgroupfs",
"conmon_cgroup": crioMonitorCgroup,
"default_runtime": runtimeNameCrun,
"runtimes": crioRuntimeRuntimes,
}

crio := map[string]any{
"api": crioAPI,
"image": crioImage,
"metrics": crioMetrics,
"runtime": crioRuntime,
}

config := map[string]any{
"crio": crio,
}

// See https://github.com/cri-o/cri-o/blob/main/docs/crio.8.md
env = append(env,
corev1.EnvVar{Name: "CONTAINER_LISTEN", Value: cri.GetSocketPath()},
corev1.EnvVar{Name: "CONTAINER_CGROUP_MANAGER", Value: "cgroupfs"},
corev1.EnvVar{Name: "CONTAINER_CONMON_CGROUP", Value: "pod"},
)
if cri.StoragePath != nil {
env = append(env, corev1.EnvVar{Name: "CONTAINER_ROOT", Value: *cri.StoragePath})
crio["root"] = *cri.StoragePath
}

if cri.Spec.SandboxImage != nil {
env = append(env, corev1.EnvVar{Name: "CONTAINER_PAUSE_IMAGE", Value: *cri.Spec.SandboxImage})
crioImage["pause_image"] = *cri.Spec.SandboxImage
}

if cri.MonitoringPort != 0 {
env = append(env,
corev1.EnvVar{Name: "CONTAINER_ENABLE_METRICS", Value: "true"},
corev1.EnvVar{Name: "CONTAINER_METRICS_HOST", Value: ""},
corev1.EnvVar{Name: "CONTAINER_METRICS_PORT", Value: fmt.Sprintf("%d", cri.MonitoringPort)},
)
crioMetrics["enable_metrics"] = true
crioMetrics["metrics_host"] = ""
crioMetrics["metrics_port"] = cri.MonitoringPort
}
return env

if cri.Runtime != nil && cri.Runtime.Nvidia != nil {
crioRuntimeRuntimes[runtimeNameNvidia] = map[string]any{
"runtime_type": runtimeTypeOCI,
"runtime_path": runtimePathNvidia,
"allowed_annotations": []string{
shmSizeAnnotation,
},
"monitor_cgroup": crioMonitorCgroup,
"monitor_path": crioMonitorPath,
}
crioRuntime["default_runtime"] = runtimeNameNvidia
}

return marshallYsonConfig(config)
}

func (cri *CRIConfigGenerator) GetContainerdConfig() ([]byte, error) {
Expand Down Expand Up @@ -143,25 +216,25 @@ func (cri *CRIConfigGenerator) GetContainerdConfig() ([]byte, error) {

func (cri *CRIConfigGenerator) getContainerdRuntimes() (runtimes map[string]any, defaultRuntimeName string) {
runtimes = map[string]any{
"runc": map[string]any{
runtimeNameRunc: map[string]any{
"runtime_type": "io.containerd.runc.v2",
"sandbox_mode": "podsandbox",
"options": map[string]any{
"SystemdCgroup": false,
},
},
}
defaultRuntimeName = "runc"
defaultRuntimeName = runtimeNameRunc

if cri.Runtime != nil && cri.Runtime.Nvidia != nil {
runtimes["nvidia"] = map[string]any{
runtimes[runtimeNameNvidia] = map[string]any{
"runtime_type": "io.containerd.runc.v2",
"sandbox_mode": "podsandbox",
"options": map[string]any{
"BinaryName": "/usr/bin/nvidia-container-runtime",
"BinaryName": runtimePathNvidia,
},
}
defaultRuntimeName = "nvidia"
defaultRuntimeName = runtimeNameNvidia
}

return runtimes, defaultRuntimeName
Expand Down
Loading
Loading