diff --git a/crates/stackable-operator/CHANGELOG.md b/crates/stackable-operator/CHANGELOG.md index a62911593..c9caf925d 100644 --- a/crates/stackable-operator/CHANGELOG.md +++ b/crates/stackable-operator/CHANGELOG.md @@ -13,12 +13,14 @@ All notable changes to this project will be documented in this file. - BREAKING: `ClusterResources` now requires the objects added to implement `DeepMerge`. This is very likely a stackable-operator internal change, but technically breaking ([#1118]). +- Add support for the SSH protocol for pulling git content ([#1121]). ### Removed - BREAKING: `ClusterResources` no longer derives `Eq` ([#1118]). [#1118]: https://github.com/stackabletech/operator-rs/pull/1118 +[#1121]: https://github.com/stackabletech/operator-rs/pull/1121 ## [0.100.3] - 2025-10-31 diff --git a/crates/stackable-operator/crds/DummyCluster.yaml b/crates/stackable-operator/crds/DummyCluster.yaml index c96b4b557..8e95729fa 100644 --- a/crates/stackable-operator/crds/DummyCluster.yaml +++ b/crates/stackable-operator/crds/DummyCluster.yaml @@ -75,6 +75,77 @@ spec: domainName: description: A validated domain name type conforming to RFC 1123, so e.g. not an IP address type: string + gitSync: + properties: + branch: + default: main + description: |- + The branch to clone; defaults to `main`. + + Since git-sync v4.x.x this field is mapped to the flag `--ref`. + type: string + credentialsSecret: + description: |- + The name of the Secret used to access the repository if it is not public. + + The referenced Secret must include two fields: `user` and `password`. + The `password` field can either be an actual password (not recommended) or a GitHub token, + as described in the git-sync [documentation]. + This cannot be provided if `ssh_secret` is also provided. + + [documentation]: https://github.com/kubernetes/git-sync/tree/v4.2.4?tab=readme-ov-file#manual + nullable: true + type: string + depth: + default: 1 + description: The depth of syncing, i.e. the number of commits to clone; defaults to 1. + format: uint32 + minimum: 0.0 + type: integer + gitFolder: + default: / + description: |- + Location in the Git repository containing the resource; defaults to the root folder. + + It can optionally start with `/`, however, no trailing slash is recommended. + An empty string (``) or slash (`/`) corresponds to the root folder in Git. + type: string + gitSyncConf: + additionalProperties: + type: string + default: {} + description: |- + A map of optional configuration settings that are listed in the git-sync [documentation]. + + Also read the git-sync [example] in our documentation. These settings are not verified. + + [documentation]: https://github.com/kubernetes/git-sync/tree/v4.2.4?tab=readme-ov-file#manual + [example]: https://docs.stackable.tech/home/nightly/airflow/usage-guide/mounting-dags#_example + type: object + repo: + description: 'The git repository URL that will be cloned, for example: `https://github.com/stackabletech/airflow-operator` or `ssh://git@github.com:stackable-airflow/dags.git`.' + format: uri + type: string + sshSecret: + description: |- + The name of the Secret used for SSH access to the repository. + + The referenced Secret must include two fields: `key` and `knownHosts`. + This cannot be provided if `credentials_secret` is also provided. + + [documentation]: https://github.com/kubernetes/git-sync/tree/v4.2.4?tab=readme-ov-file#manual + nullable: true + type: string + wait: + default: 20s + description: |- + The synchronization interval, e.g. `20s` or `5m`; defaults to `20s`. + + Since git-sync v4.x.x this field is mapped to the flag `--period`. + type: string + required: + - repo + type: object hostName: type: string kerberosRealmName: @@ -1402,6 +1473,7 @@ spec: - clientAuthenticationDetails - clusterOperation - domainName + - gitSync - hostName - kerberosRealmName - opaConfig diff --git a/crates/stackable-operator/src/crd/git_sync/mod.rs b/crates/stackable-operator/src/crd/git_sync/mod.rs index 92e9eab52..a3ac77f1e 100644 --- a/crates/stackable-operator/src/crd/git_sync/mod.rs +++ b/crates/stackable-operator/src/crd/git_sync/mod.rs @@ -20,7 +20,7 @@ pub mod versioned { #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Eq, Serialize)] #[serde(rename_all = "camelCase")] pub struct GitSync { - /// The git repository URL that will be cloned, for example: `https://github.com/stackabletech/airflow-operator`. + /// The git repository URL that will be cloned, for example: `https://github.com/stackabletech/airflow-operator` or `ssh://git@github.com:stackable-airflow/dags.git`. pub repo: Url, /// The branch to clone; defaults to `main`. @@ -51,6 +51,7 @@ pub mod versioned { /// The referenced Secret must include two fields: `user` and `password`. /// The `password` field can either be an actual password (not recommended) or a GitHub token, /// as described in the git-sync [documentation]. + /// This cannot be provided if `ssh_secret` is also provided. /// /// [documentation]: https://github.com/kubernetes/git-sync/tree/v4.2.4?tab=readme-ov-file#manual pub credentials_secret: Option, @@ -63,5 +64,13 @@ pub mod versioned { /// [example]: DOCS_BASE_URL_PLACEHOLDER/airflow/usage-guide/mounting-dags#_example #[serde(default)] pub git_sync_conf: BTreeMap, + + /// The name of the Secret used for SSH access to the repository. + /// + /// The referenced Secret must include two fields: `key` and `knownHosts`. + /// This cannot be provided if `credentials_secret` is also provided. + /// + /// [documentation]: https://github.com/kubernetes/git-sync/tree/v4.2.4?tab=readme-ov-file#manual + pub ssh_secret: Option, } } diff --git a/crates/stackable-operator/src/crd/git_sync/v1alpha1_impl.rs b/crates/stackable-operator/src/crd/git_sync/v1alpha1_impl.rs index a29789ba8..e0f67fd4b 100644 --- a/crates/stackable-operator/src/crd/git_sync/v1alpha1_impl.rs +++ b/crates/stackable-operator/src/crd/git_sync/v1alpha1_impl.rs @@ -9,7 +9,9 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ builder::pod::{ - container::ContainerBuilder, resources::ResourceRequirementsBuilder, volume::VolumeBuilder, + container::ContainerBuilder, + resources::ResourceRequirementsBuilder, + volume::{VolumeBuilder, VolumeMountBuilder}, }, commons::product_image_selection::ResolvedProductImage, crd::git_sync::v1alpha1::GitSync, @@ -24,6 +26,8 @@ use crate::{ pub const CONTAINER_NAME_PREFIX: &str = "git-sync"; pub const VOLUME_NAME_PREFIX: &str = "content-from-git"; pub const MOUNT_PATH_PREFIX: &str = "/stackable/app/git"; +pub const SSH_VOLUME_NAME_PREFIX: &str = "ssh-keys-info"; +pub const SSH_MOUNT_PATH_PREFIX: &str = "/stackable/gitssh"; pub const GIT_SYNC_SAFE_DIR_OPTION: &str = "safe.directory"; pub const GIT_SYNC_ROOT_DIR: &str = "/tmp/git"; pub const GIT_SYNC_LINK: &str = "current"; @@ -40,6 +44,9 @@ pub enum Error { AddVolumeMount { source: crate::builder::pod::container::Error, }, + + #[snafu(display("failed to declare unique credentials"))] + MultipleCredentials, } impl GitSync { @@ -77,6 +84,9 @@ pub struct GitSyncResources { /// Absolute paths to the Git contents in the mounted volumes pub git_content_folders: Vec, + + /// GitSync volumes containing the synchronized repository + pub git_ssh_volumes: Vec, } impl GitSyncResources { @@ -107,6 +117,11 @@ impl GitSyncResources { let mut resources = GitSyncResources::default(); for (i, git_sync) in git_syncs.iter().enumerate() { + if git_sync.credentials_secret.is_some() && git_sync.ssh_secret.is_some() { + // Gitsync will not allow the declaration of both ssh-key and password/token credentials + return Err(Error::MultipleCredentials); + } + let mut env_vars = vec![]; if let Some(git_credentials_secret) = &git_sync.credentials_secret { env_vars.push(GitSyncResources::env_var_from_secret( @@ -120,6 +135,19 @@ impl GitSyncResources { "password", )); } + if git_sync.ssh_secret.is_some() { + env_vars.push(EnvVar { + name: "GITSYNC_SSH_KEY_FILE".to_owned(), + value: Some(format!("{SSH_MOUNT_PATH_PREFIX}-{i}/key").to_owned()), + value_from: None, + }); + env_vars.push(EnvVar { + name: "GITSYNC_SSH_KNOWN_HOSTS_FILE".to_owned(), + value: Some(format!("{SSH_MOUNT_PATH_PREFIX}-{i}/knownHosts").to_owned()), + value_from: None, + }); + } + env_vars = insert_or_update_env_vars(&env_vars, extra_env_vars); let volume_name = format!("{VOLUME_NAME_PREFIX}-{i}"); @@ -139,8 +167,18 @@ impl GitSyncResources { let mut git_sync_container_volume_mounts = vec![git_sync_root_volume_mount, log_volume_mount]; + git_sync_container_volume_mounts.extend_from_slice(extra_volume_mounts); + if git_sync.ssh_secret.is_some() { + let ssh_mount_path = format!("{SSH_MOUNT_PATH_PREFIX}-{i}"); + let ssh_volume_name = format!("{SSH_VOLUME_NAME_PREFIX}-{i}"); + + let ssh_volume_mount = + VolumeMountBuilder::new(ssh_volume_name, ssh_mount_path).build(); + git_sync_container_volume_mounts.push(ssh_volume_mount); + } + let container = Self::create_git_sync_container( &format!("{CONTAINER_NAME_PREFIX}-{i}"), resolved_product_image, @@ -186,6 +224,15 @@ impl GitSyncResources { .git_content_volume_mounts .push(git_content_volume_mount); resources.git_content_folders.push(git_content_folder); + + if let Some(get_ssh_secret) = &git_sync.ssh_secret { + let ssh_volume_name = format!("{SSH_VOLUME_NAME_PREFIX}-{i}"); + + let ssh_secret_volume = VolumeBuilder::new(&ssh_volume_name) + .with_secret(get_ssh_secret, false) + .build(); + resources.git_ssh_volumes.push(ssh_secret_volume); + } } Ok(resources) @@ -876,4 +923,209 @@ name: content-from-git-2 .unwrap() ); } + + #[test] + fn test_git_sync_ssh() { + let git_sync_spec = r#" + # GitSync using SSH + - repo: ssh://git@github.com/stackabletech/repo.git + branch: trunk + gitFolder: "" + depth: 3 + wait: 1m + sshSecret: git-sync-ssh + gitSyncConf: + --rev: HEAD + --git-config: http.sslCAInfo:/tmp/ca-cert/ca.crt + "#; + + let git_syncs: Vec = yaml_from_str_singleton_map(git_sync_spec).unwrap(); + + let resolved_product_image = ResolvedProductImage { + image: "oci.stackable.tech/sdp/product:latest".to_string(), + app_version_label_value: "1.0.0-latest" + .parse() + .expect("static app version label is always valid"), + product_version: "1.0.0".to_string(), + image_pull_policy: "Always".to_string(), + pull_secrets: None, + }; + + let extra_env_vars = env_vars_from([("VAR1", "value1")]); + + let extra_volume_mounts = [VolumeMount { + name: "extra-volume".to_string(), + mount_path: "/mnt/extra-volume".to_string(), + ..VolumeMount::default() + }]; + + let git_sync_resources = GitSyncResources::new( + &git_syncs, + &resolved_product_image, + &extra_env_vars, + &extra_volume_mounts, + "log-volume", + &validate(default_container_log_config()).unwrap(), + ) + .unwrap(); + + assert!(git_sync_resources.is_git_sync_enabled()); + + assert_eq!(1, git_sync_resources.git_sync_containers.len()); + + assert_eq!( + r#"args: +- |- + mkdir --parents /stackable/log/git-sync-0 && exec > >(tee /stackable/log/git-sync-0/container.stdout.log) 2> >(tee /stackable/log/git-sync-0/container.stderr.log >&2) + + prepare_signal_handlers() + { + unset term_child_pid + unset term_kill_needed + trap 'handle_term_signal' TERM + } + + handle_term_signal() + { + if [ "${term_child_pid}" ]; then + kill -TERM "${term_child_pid}" 2>/dev/null + else + term_kill_needed="yes" + fi + } + + wait_for_termination() + { + set +e + term_child_pid=$1 + if [[ -v term_kill_needed ]]; then + kill -TERM "${term_child_pid}" 2>/dev/null + fi + wait ${term_child_pid} 2>/dev/null + trap - TERM + wait ${term_child_pid} 2>/dev/null + set -e + } + + prepare_signal_handlers + /stackable/git-sync --depth=3 --git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt' --link=current --one-time=false --period=60s --ref=trunk --repo=ssh://git@github.com/stackabletech/repo.git --rev=HEAD --root=/tmp/git & + wait_for_termination $! +command: +- /bin/bash +- -x +- -euo +- pipefail +- -c +env: +- name: GITSYNC_SSH_KEY_FILE + value: /stackable/gitssh-0/key +- name: GITSYNC_SSH_KNOWN_HOSTS_FILE + value: /stackable/gitssh-0/knownHosts +- name: VAR1 + value: value1 +image: oci.stackable.tech/sdp/product:latest +imagePullPolicy: Always +name: git-sync-0 +resources: + limits: + cpu: 200m + memory: 64Mi + requests: + cpu: 100m + memory: 64Mi +volumeMounts: +- mountPath: /tmp/git + name: content-from-git-0 +- mountPath: /stackable/log + name: log-volume +- mountPath: /mnt/extra-volume + name: extra-volume +- mountPath: /stackable/gitssh-0 + name: ssh-keys-info-0 +"#, + serde_yaml::to_string(&git_sync_resources.git_sync_containers.first()).unwrap() + ); + + assert_eq!(1, git_sync_resources.git_sync_init_containers.len()); + + assert_eq!( + r#"args: +- |- + mkdir --parents /stackable/log/git-sync-0-init && exec > >(tee /stackable/log/git-sync-0-init/container.stdout.log) 2> >(tee /stackable/log/git-sync-0-init/container.stderr.log >&2) + /stackable/git-sync --depth=3 --git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt' --link=current --one-time=true --period=60s --ref=trunk --repo=ssh://git@github.com/stackabletech/repo.git --rev=HEAD --root=/tmp/git +command: +- /bin/bash +- -x +- -euo +- pipefail +- -c +env: +- name: GITSYNC_SSH_KEY_FILE + value: /stackable/gitssh-0/key +- name: GITSYNC_SSH_KNOWN_HOSTS_FILE + value: /stackable/gitssh-0/knownHosts +- name: VAR1 + value: value1 +image: oci.stackable.tech/sdp/product:latest +imagePullPolicy: Always +name: git-sync-0-init +resources: + limits: + cpu: 200m + memory: 64Mi + requests: + cpu: 100m + memory: 64Mi +volumeMounts: +- mountPath: /tmp/git + name: content-from-git-0 +- mountPath: /stackable/log + name: log-volume +- mountPath: /mnt/extra-volume + name: extra-volume +- mountPath: /stackable/gitssh-0 + name: ssh-keys-info-0 +"#, + serde_yaml::to_string(&git_sync_resources.git_sync_init_containers.first()).unwrap() + ); + + assert_eq!(1, git_sync_resources.git_content_volumes.len()); + + assert_eq!( + "emptyDir: {} +name: content-from-git-0 +", + serde_yaml::to_string(&git_sync_resources.git_content_volumes.first()).unwrap() + ); + + assert_eq!(1, git_sync_resources.git_content_volume_mounts.len()); + + assert_eq!( + "mountPath: /stackable/app/git-0 +name: content-from-git-0 +", + serde_yaml::to_string(&git_sync_resources.git_content_volume_mounts.first()).unwrap() + ); + + assert_eq!(1, git_sync_resources.git_content_folders.len()); + + assert_eq!( + "/stackable/app/git-0/current/", + git_sync_resources + .git_content_folders_as_string() + .first() + .unwrap() + ); + + assert_eq!(1, git_sync_resources.git_ssh_volumes.len()); + + assert_eq!( + "name: ssh-keys-info-0 +secret: + optional: false + secretName: git-sync-ssh +", + serde_yaml::to_string(&git_sync_resources.git_ssh_volumes.first()).unwrap() + ); + } } diff --git a/crates/xtask/src/crd/dummy.rs b/crates/xtask/src/crd/dummy.rs index ac65c4f60..28a0c3f91 100644 --- a/crates/xtask/src/crd/dummy.rs +++ b/crates/xtask/src/crd/dummy.rs @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize}; use stackable_operator::{ commons::resources::{JvmHeapLimits, Resources}, config::fragment::Fragment, + crd::git_sync::v1alpha1::GitSync, deep_merger::ObjectOverrides, kube::CustomResource, role_utils::Role, @@ -48,6 +49,7 @@ pub mod versioned { secret_class_volume: stackable_operator::commons::secret_class::SecretClassVolume, secret_reference: stackable_operator::shared::secret::SecretReference, tls_client_details: stackable_operator::commons::tls_verification::TlsClientDetails, + git_sync: GitSync, #[serde(default)] pub object_overrides: ObjectOverrides,