diff --git a/Cargo.toml b/Cargo.toml index 1bc93bf2..c0e49379 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,7 @@ bottlerocket-template-helper = { path = "./bottlerocket-template-helper", versio # Settings Models bottlerocket-model-derive = { path = "./bottlerocket-settings-models/model-derive", version = "0.1" } -bottlerocket-modeled-types = { path = "./bottlerocket-settings-models/modeled-types", version = "0.7" } +bottlerocket-modeled-types = { path = "./bottlerocket-settings-models/modeled-types", version = "0.8" } bottlerocket-scalar = { path = "./bottlerocket-settings-models/scalar", version = "0.1" } bottlerocket-scalar-derive = { path = "./bottlerocket-settings-models/scalar-derive", version = "0.1" } bottlerocket-string-impls-for = { path = "./bottlerocket-settings-models/string-impls-for", version = "0.1" } @@ -75,7 +75,7 @@ settings-extension-ecs = { path = "./bottlerocket-settings-models/settings-exten settings-extension-host-containers = { path = "./bottlerocket-settings-models/settings-extensions/host-containers", version = "0.1" } settings-extension-kernel = { path = "./bottlerocket-settings-models/settings-extensions/kernel", version = "0.1" } settings-extension-kubernetes = { path = "./bottlerocket-settings-models/settings-extensions/kubernetes", version = "0.2" } -settings-extension-kubelet-device-plugins = { path = "./bottlerocket-settings-models/settings-extensions/kubelet-device-plugins", version = "0.1" } +settings-extension-kubelet-device-plugins = { path = "./bottlerocket-settings-models/settings-extensions/kubelet-device-plugins", version = "0.2" } settings-extension-metrics = { path = "./bottlerocket-settings-models/settings-extensions/metrics", version = "0.1" } settings-extension-motd = { path = "./bottlerocket-settings-models/settings-extensions/motd", version = "0.1" } settings-extension-network = { path = "./bottlerocket-settings-models/settings-extensions/network", version = "0.1" } diff --git a/bottlerocket-settings-models/modeled-types/Cargo.toml b/bottlerocket-settings-models/modeled-types/Cargo.toml index 771bcf72..96b506b6 100644 --- a/bottlerocket-settings-models/modeled-types/Cargo.toml +++ b/bottlerocket-settings-models/modeled-types/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bottlerocket-modeled-types" -version = "0.7.0" +version = "0.8.0" authors = [] license = "Apache-2.0 OR MIT" edition = "2021" diff --git a/bottlerocket-settings-models/modeled-types/src/kubernetes.rs b/bottlerocket-settings-models/modeled-types/src/kubernetes.rs index 338309b3..e62056f1 100644 --- a/bottlerocket-settings-models/modeled-types/src/kubernetes.rs +++ b/bottlerocket-settings-models/modeled-types/src/kubernetes.rs @@ -1461,7 +1461,7 @@ mod test_hostname_override_source { // =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= -/// NvidiaRuntimeSettings contains the container runtime settings for Nvidia gpu. +/// NvidiaDevicePluginSettings contains the device sharing and partitioning related settings for Nvidia gpu. #[model(impl_default = true)] pub struct NvidiaDevicePluginSettings { pass_device_specs: bool, @@ -1469,6 +1469,8 @@ pub struct NvidiaDevicePluginSettings { device_list_strategy: NvidiaDeviceListStrategy, device_sharing_strategy: NvidiaDeviceSharingStrategy, time_slicing: NvidiaTimeSlicingSettings, + device_partitioning_strategy: NvidiaDevicePartitioningStrategy, + mig: NvidiaMigSettings, } #[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] @@ -1499,10 +1501,123 @@ pub struct NvidiaTimeSlicingSettings { fail_requests_greater_than_one: bool, } +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] +#[serde(rename_all = "lowercase")] +pub enum NvidiaDevicePartitioningStrategy { + #[default] + None, + MIG, +} + +#[model(impl_default = true)] +pub struct NvidiaMigSettings { + profile: HashMap, +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct NvidiaGpuModel { + inner: String, +} + +lazy_static! { + pub(crate) static ref NVIDIAGPU_NAME: Regex = Regex::new(r"^([a-z])(\d+)\.(\d+)gb$").unwrap(); +} + +impl TryFrom<&str> for NvidiaGpuModel { + type Error = error::Error; + + fn try_from(input: &str) -> Result { + ensure!( + NVIDIAGPU_NAME.is_match(input), + error::PatternSnafu { + thing: "NVIDIA GPU Model", + pattern: NVIDIAGPU_NAME.clone(), + input + } + ); + + Ok(NvidiaGpuModel { + inner: input.to_string(), + }) + } +} + +string_impls_for!(NvidiaGpuModel, "NvidiaGpuModel"); + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct MigProfile { + inner: String, +} + +lazy_static! { + pub(crate) static ref MIGPROFILE_NAME: Regex = Regex::new(r"^[0-9]g\.\d+gb$").unwrap(); +} + +impl TryFrom<&str> for MigProfile { + type Error = error::Error; + + fn try_from(input: &str) -> Result { + let slice_format = matches!(input, "1" | "2" | "3" | "4" | "7"); + + ensure!( + slice_format | MIGPROFILE_NAME.is_match(input), + error::PatternSnafu { + thing: "MIG Profile", + pattern: MIGPROFILE_NAME.clone(), + input + } + ); + + Ok(MigProfile { + inner: input.to_string(), + }) + } +} + +string_impls_for!(MigProfile, "MigProfile"); + #[cfg(test)] -mod tests { +mod test_nvidia_device_plugins { use super::*; + #[test] + fn valid_gpu_model() { + for ok in &["a100.40gb", "a100.80gb", "h100.80gb", "h100.141gb"] { + assert!(NvidiaGpuModel::try_from(*ok).is_ok()); + } + } + + #[test] + fn invalid_gpu_model() { + assert!(NvidiaGpuModel::try_from("invalid").is_err()); + assert!(NvidiaGpuModel::try_from("1000").is_err()); + assert!(NvidiaGpuModel::try_from("A100.40GB").is_err()); + assert!(NvidiaGpuModel::try_from("a100.40").is_err()); + } + + #[test] + fn valid_mig_profile() { + for ok in &[ + "1g.5gb", "2g.10gb", "3g.20gb", "7g.40gb", "1g.10gb", "1g.20gb", "2g.20gb", "3g.40gb", + "7g.80gb", "1g.18gb", "1g.35gb", "2g.35gb", "3g.71gb", "7g.141gb", "1", "2", "3", "4", + "7", + ] { + assert!(MigProfile::try_from(*ok).is_ok()); + } + } + + #[test] + fn invalid_mig_profile() { + assert!(MigProfile::try_from("invalid").is_err()); + assert!(MigProfile::try_from("1000").is_err()); + assert!(MigProfile::try_from("5").is_err()); + assert!(MigProfile::try_from("10g.100GB").is_err()); + assert!(MigProfile::try_from("1g.10GB").is_err()); + assert!(MigProfile::try_from("1g10gb").is_err()); + assert!(MigProfile::try_from("g.10gb").is_err()); + assert!(MigProfile::try_from("1g.gb").is_err()); + } + #[test] fn test_serde_nvidia_device_plugins() { let test_json = r#"{"pass-device-specs":false,"device-id-strategy":"uuid","device-list-strategy":"envvar"}"#; @@ -1515,7 +1630,9 @@ mod tests { device_id_strategy: Some(NvidiaDeviceIdStrategy::Uuid), device_list_strategy: Some(NvidiaDeviceListStrategy::Envvar), device_sharing_strategy: None, - time_slicing: None + time_slicing: None, + device_partitioning_strategy: None, + mig: None } ); let results = serde_json::to_string(&nvidia_device_plugins).unwrap(); @@ -1534,7 +1651,9 @@ mod tests { device_id_strategy: Some(NvidiaDeviceIdStrategy::Uuid), device_list_strategy: Some(NvidiaDeviceListStrategy::Envvar), device_sharing_strategy: Some(NvidiaDeviceSharingStrategy::TimeSlicing), - time_slicing: None + time_slicing: None, + device_partitioning_strategy: None, + mig: None } ); @@ -1548,4 +1667,53 @@ mod tests { let result: Result = serde_json::from_str(test_json); assert!(result.is_err(), "The JSON should not be parsed successfully as it contains an invalid value for 'replicas'."); } + + #[test] + fn test_serde_nvidia_device_plugins_with_mig() { + let test_json = r#"{"pass-device-specs":false,"device-id-strategy":"uuid","device-list-strategy":"envvar","device-partitioning-strategy":"mig"}"#; + let nvidia_device_plugins: NvidiaDevicePluginSettings = + serde_json::from_str(test_json).unwrap(); + assert_eq!( + nvidia_device_plugins, + NvidiaDevicePluginSettings { + pass_device_specs: Some(false), + device_id_strategy: Some(NvidiaDeviceIdStrategy::Uuid), + device_list_strategy: Some(NvidiaDeviceListStrategy::Envvar), + device_sharing_strategy: None, + time_slicing: None, + device_partitioning_strategy: Some(NvidiaDevicePartitioningStrategy::MIG), + mig: None + } + ); + + let results = serde_json::to_string(&nvidia_device_plugins).unwrap(); + assert_eq!(results, test_json); + } + + #[test] + fn test_serde_nvidia_device_plugins_with_mig_profile() { + let test_json = r#"{"pass-device-specs":false,"device-id-strategy":"uuid","device-list-strategy":"envvar","device-partitioning-strategy":"mig","mig":{"profile":{"a100.40gb":"1g.5gb"}}}"#; + let nvidia_device_plugins: NvidiaDevicePluginSettings = + serde_json::from_str(test_json).unwrap(); + assert_eq!( + nvidia_device_plugins, + NvidiaDevicePluginSettings { + pass_device_specs: Some(false), + device_id_strategy: Some(NvidiaDeviceIdStrategy::Uuid), + device_list_strategy: Some(NvidiaDeviceListStrategy::Envvar), + device_sharing_strategy: None, + time_slicing: None, + device_partitioning_strategy: Some(NvidiaDevicePartitioningStrategy::MIG), + mig: Some(NvidiaMigSettings { + profile: Some(HashMap::from([( + NvidiaGpuModel::try_from("a100.40gb").unwrap(), + MigProfile::try_from("1g.5gb").unwrap() + )])) + }), + } + ); + + let results = serde_json::to_string(&nvidia_device_plugins).unwrap(); + assert_eq!(results, test_json); + } } diff --git a/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/Cargo.toml b/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/Cargo.toml index ee0a0516..7e12abaa 100644 --- a/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/Cargo.toml +++ b/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "settings-extension-kubelet-device-plugins" -version = "0.1.0" +version = "0.2.0" authors = ["Arnaldo Garcia Rincon "] license = "Apache-2.0 OR MIT" edition = "2021" diff --git a/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/src/lib.rs b/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/src/lib.rs index 9607e29e..db268b41 100644 --- a/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/src/lib.rs +++ b/bottlerocket-settings-models/settings-extensions/kubelet-device-plugins/src/lib.rs @@ -43,10 +43,12 @@ impl SettingsModel for KubeletDevicePluginsV1 { mod test { use super::*; use bottlerocket_modeled_types::{ - NvidiaDeviceIdStrategy, NvidiaDeviceListStrategy, NvidiaDeviceSharingStrategy, - NvidiaTimeSlicingSettings, + MigProfile, NvidiaDeviceIdStrategy, NvidiaDeviceListStrategy, + NvidiaDevicePartitioningStrategy, NvidiaDeviceSharingStrategy, NvidiaGpuModel, + NvidiaMigSettings, NvidiaTimeSlicingSettings, }; use bounded_integer::BoundedI32; + use std::collections::HashMap; #[test] fn test_generate_kubelet_device_plugins() { @@ -59,7 +61,7 @@ mod test { #[test] fn test_serde_kubelet_device_plugins() { - let test_json = r#"{"nvidia":{"pass-device-specs":true,"device-id-strategy":"index","device-list-strategy":"volume-mounts","device-sharing-strategy":"time-slicing","time-slicing":{"replicas":2,"rename-by-default":true,"fail-requests-greater-than-one":true}}}"#; + let test_json = r#"{"nvidia":{"pass-device-specs":true,"device-id-strategy":"index","device-list-strategy":"volume-mounts","device-sharing-strategy":"time-slicing","time-slicing":{"replicas":2,"rename-by-default":true,"fail-requests-greater-than-one":true},"device-partitioning-strategy":"mig","mig":{"profile":{"a100.40gb":"1g.5gb"}}}}"#; let device_plugins: KubeletDevicePluginsV1 = serde_json::from_str(test_json).unwrap(); assert_eq!( @@ -75,6 +77,13 @@ mod test { rename_by_default: Some(true), fail_requests_greater_than_one: Some(true), }), + device_partitioning_strategy: Some(NvidiaDevicePartitioningStrategy::MIG), + mig: Some(NvidiaMigSettings { + profile: Some(HashMap::from([( + NvidiaGpuModel::try_from("a100.40gb").unwrap(), + MigProfile::try_from("1g.5gb").unwrap() + )])) + }), }), } ); diff --git a/bottlerocket-settings-models/settings-models/Cargo.toml b/bottlerocket-settings-models/settings-models/Cargo.toml index 20635811..599af1ff 100644 --- a/bottlerocket-settings-models/settings-models/Cargo.toml +++ b/bottlerocket-settings-models/settings-models/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bottlerocket-settings-models" -version = "0.7.0" +version = "0.8.0" authors = ["Tom Kirchner "] license = "Apache-2.0 OR MIT" edition = "2021"