Skip to content

Commit 553b40d

Browse files
sbernauerdervoeti
authored andcommitted
fix: Pass KUBERNETES_NODE_NAME to product sidecars and other bugs (#744)
* fix: Pass KUBERNETES_NODE_NAME to product sidecars and other bugs * Unneccesary constification * changelog
1 parent 6acc5b0 commit 553b40d

File tree

5 files changed

+111
-19
lines changed

5 files changed

+111
-19
lines changed

CHANGELOG.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.
66

77
### Added
88

9-
- Adds new telemetry CLI arguments and environment variables ([#715]).
9+
- Adds new telemetry CLI arguments and environment variables ([#715], [#744]).
1010
- Use `--file-log-max-files` (or `FILE_LOG_MAX_FILES`) to limit the number of log files kept.
1111
- Use `--file-log-rotation-period` (or `FILE_LOG_ROTATION_PERIOD`) to configure the frequency of rotation.
1212
- Use `--console-log-format` (or `CONSOLE_LOG_FORMAT`) to set the format to `plain` (default) or `json`.
@@ -17,7 +17,7 @@ All notable changes to this project will be documented in this file.
1717

1818
### Changed
1919

20-
- BREAKING: Replace stackable-operator `initialize_logging` with stackable-telemetry `Tracing` ([#703], [#710], [#715]).
20+
- BREAKING: Replace stackable-operator `initialize_logging` with stackable-telemetry `Tracing` ([#703], [#710], [#715], [#744]).
2121
- operator-binary:
2222
- The console log level was set by `OPA_OPERATOR_LOG`, and is now set by `CONSOLE_LOG_LEVEL`.
2323
- The file log level was set by `OPA_OPERATOR_LOG`, and is now set by `FILE_LOG_LEVEL`.
@@ -43,7 +43,7 @@ All notable changes to this project will be documented in this file.
4343
- The defaults from the docker images itself will now apply, which will be different from 1000/0 going forward
4444
- This is marked as breaking because tools and policies might exist, which require these fields to be set
4545
- user-info-fetcher: the AD backend now uses the Kerberos realm to expand the user search filter ([#737])
46-
- BREAKING: Bump stackable-operator to 0.94.0 and update other dependencies ([#743]).
46+
- BREAKING: Bump stackable-operator to 0.94.0 and update other dependencies ([#743], [#744]).
4747
- The default Kubernetes cluster domain name is now fetched from the kubelet API unless explicitly configured.
4848
- This requires operators to have the RBAC permission to get nodes/proxy in the apiGroup "". The helm-chart takes care of this.
4949
- The CLI argument `--kubernetes-node-name` or env variable `KUBERNETES_NODE_NAME` needs to be set. The helm-chart takes care of this.
@@ -52,6 +52,8 @@ All notable changes to this project will be documented in this file.
5252

5353
- Use `json` file extension for log files ([#709]).
5454
- Allow uppercase characters in domain names ([#743]).
55+
- Add missing RBAC permission to patch Kubernetes `events` used for error reporting to helm-chart ([#744]).
56+
- Correctly propagate the Kubernetes cluster domain to the `opa-bundle-builder` and `user-info-fetcher` sidecars ([#744]).
5557

5658
### Removed
5759

@@ -71,6 +73,7 @@ All notable changes to this project will be documented in this file.
7173
[#732]: https://github.com/stackabletech/opa-operator/pull/732
7274
[#737]: https://github.com/stackabletech/opa-operator/pull/737
7375
[#743]: https://github.com/stackabletech/opa-operator/pull/743
76+
[#744]: https://github.com/stackabletech/opa-operator/pull/744
7477

7578
## [25.3.0] - 2025-03-21
7679

deploy/helm/opa-operator/templates/roles.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ rules:
8585
- patch
8686
verbs:
8787
- create
88+
- patch
8889
- apiGroups:
8990
- {{ include "operator.name" . }}.stackable.tech
9091
resources:

rust/operator-binary/src/controller.rs

Lines changed: 98 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ use stackable_operator::{
3838
api::{
3939
apps::v1::{DaemonSet, DaemonSetSpec},
4040
core::v1::{
41-
ConfigMap, EmptyDirVolumeSource, EnvVar, HTTPGetAction, Probe, SecretVolumeSource,
42-
Service, ServiceAccount, ServicePort, ServiceSpec,
41+
ConfigMap, EmptyDirVolumeSource, EnvVar, EnvVarSource, HTTPGetAction,
42+
ObjectFieldSelector, Probe, SecretVolumeSource, Service, ServiceAccount,
43+
ServicePort, ServiceSpec,
4344
},
4445
},
4546
apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString},
@@ -69,7 +70,7 @@ use stackable_operator::{
6970
operations::ClusterOperationsConditionBuilder,
7071
},
7172
time::Duration,
72-
utils::COMMON_BASH_TRAP_FUNCTIONS,
73+
utils::{COMMON_BASH_TRAP_FUNCTIONS, cluster_info::KubernetesClusterInfo},
7374
};
7475
use strum::{EnumDiscriminants, IntoStaticStr};
7576

@@ -108,6 +109,12 @@ const TLS_STORE_DIR: &str = "/stackable/tls";
108109

109110
const DOCKER_IMAGE_BASE_NAME: &str = "opa";
110111

112+
const CONSOLE_LOG_LEVEL_ENV: &str = "CONSOLE_LOG_LEVEL";
113+
const FILE_LOG_LEVEL_ENV: &str = "FILE_LOG_LEVEL";
114+
const FILE_LOG_DIRECTORY_ENV: &str = "FILE_LOG_DIRECTORY";
115+
const KUBERNETES_NODE_NAME_ENV: &str = "KUBERNETES_NODE_NAME";
116+
const KUBERNETES_CLUSTER_DOMAIN_ENV: &str = "KUBERNETES_CLUSTER_DOMAIN";
117+
111118
// logging defaults
112119
const DEFAULT_DECISION_LOGGING_ENABLED: bool = false;
113120
const DEFAULT_FILE_LOG_LEVEL: LogLevel = LogLevel::INFO;
@@ -148,6 +155,7 @@ pub struct Ctx {
148155
pub product_config: ProductConfigManager,
149156
pub opa_bundle_builder_image: String,
150157
pub user_info_fetcher_image: String,
158+
pub cluster_info: KubernetesClusterInfo,
151159
}
152160

153161
#[derive(Snafu, Debug, EnumDiscriminants)]
@@ -501,6 +509,7 @@ pub async fn reconcile_opa(
501509
&ctx.opa_bundle_builder_image,
502510
&ctx.user_info_fetcher_image,
503511
&rbac_sa,
512+
&ctx.cluster_info,
504513
)?;
505514

506515
cluster_resources
@@ -729,6 +738,44 @@ fn build_server_rolegroup_config_map(
729738
})
730739
}
731740

741+
/// Env variables that are need to run stackable Rust binaries, such as
742+
/// * opa-bundle-builder
743+
/// * user-info-fetcher
744+
fn add_stackable_rust_cli_env_vars(
745+
container_builder: &mut ContainerBuilder,
746+
cluster_info: &KubernetesClusterInfo,
747+
log_level: impl Into<String>,
748+
container: &v1alpha1::Container,
749+
) {
750+
let log_level = log_level.into();
751+
container_builder
752+
.add_env_var(CONSOLE_LOG_LEVEL_ENV, log_level.clone())
753+
.add_env_var(FILE_LOG_LEVEL_ENV, log_level)
754+
.add_env_var(
755+
FILE_LOG_DIRECTORY_ENV,
756+
format!("{STACKABLE_LOG_DIR}/{container}",),
757+
)
758+
.add_env_var_from_source(
759+
KUBERNETES_NODE_NAME_ENV,
760+
EnvVarSource {
761+
field_ref: Some(ObjectFieldSelector {
762+
field_path: "spec.nodeName".to_owned(),
763+
..Default::default()
764+
}),
765+
..Default::default()
766+
},
767+
)
768+
// We set the cluster domain always explicitly, because the product Pods does not have the
769+
// RBAC permission to get the `nodes/proxy` resource at cluster scope. This is likely
770+
// because it only has a RoleBinding and no ClusterRoleBinding.
771+
// By setting the cluster domain explicitly we avoid that the sidecars try to look it up
772+
// based on some information coming from the node.
773+
.add_env_var(
774+
KUBERNETES_CLUSTER_DOMAIN_ENV,
775+
cluster_info.cluster_domain.to_string(),
776+
);
777+
}
778+
732779
/// The rolegroup [`DaemonSet`] runs the rolegroup, as configured by the administrator.
733780
///
734781
/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the
@@ -747,6 +794,7 @@ fn build_server_rolegroup_daemonset(
747794
opa_bundle_builder_image: &str,
748795
user_info_fetcher_image: &str,
749796
service_account: &ServiceAccount,
797+
cluster_info: &KubernetesClusterInfo,
750798
) -> Result<DaemonSet> {
751799
let opa_name = opa.metadata.name.as_deref().context(NoNameSnafu)?;
752800
let role = opa.role(opa_role);
@@ -797,8 +845,6 @@ fn build_server_rolegroup_daemonset(
797845
.context(AddVolumeMountSnafu)?
798846
.resources(merged_config.resources.to_owned().into());
799847

800-
let console_and_file_log_level = bundle_builder_log_level(merged_config);
801-
802848
cb_bundle_builder
803849
.image_from_product_image(resolved_product_image) // inherit the pull policy and pull secrets, and then...
804850
.image(opa_bundle_builder_image) // ...override the image
@@ -814,12 +860,6 @@ fn build_server_rolegroup_daemonset(
814860
&bundle_builder_container_name,
815861
)])
816862
.add_env_var_from_field_path("WATCH_NAMESPACE", FieldPathEnvVar::Namespace)
817-
.add_env_var("CONSOLE_LOG_LEVEL", console_and_file_log_level.to_string())
818-
.add_env_var("FILE_LOG_LEVEL", console_and_file_log_level.to_string())
819-
.add_env_var(
820-
"FILE_LOG_DIRECTORY",
821-
format!("{STACKABLE_LOG_DIR}/{bundle_builder_container_name}"),
822-
)
823863
.add_volume_mount(BUNDLES_VOLUME_NAME, BUNDLES_DIR)
824864
.context(AddVolumeMountSnafu)?
825865
.add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR)
@@ -853,6 +893,12 @@ fn build_server_rolegroup_daemonset(
853893
}),
854894
..Probe::default()
855895
});
896+
add_stackable_rust_cli_env_vars(
897+
&mut cb_bundle_builder,
898+
cluster_info,
899+
sidecar_container_log_level(merged_config, &v1alpha1::Container::BundleBuilder).to_string(),
900+
&v1alpha1::Container::BundleBuilder,
901+
);
856902

857903
let opa_tls_config = opa.spec.cluster_config.tls.as_ref();
858904

@@ -1002,6 +1048,13 @@ fn build_server_rolegroup_daemonset(
10021048
.with_memory_limit("128Mi")
10031049
.build(),
10041050
);
1051+
add_stackable_rust_cli_env_vars(
1052+
&mut cb_user_info_fetcher,
1053+
cluster_info,
1054+
sidecar_container_log_level(merged_config, &v1alpha1::Container::UserInfoFetcher)
1055+
.to_string(),
1056+
&v1alpha1::Container::UserInfoFetcher,
1057+
);
10051058

10061059
match &user_info.backend {
10071060
user_info_fetcher::v1alpha1::Backend::None {} => {}
@@ -1325,13 +1378,42 @@ fn build_bundle_builder_start_command(
13251378
}
13261379
}
13271380

1328-
fn bundle_builder_log_level(merged_config: &v1alpha1::OpaConfig) -> BundleBuilderLogLevel {
1381+
/// TODO: *Technically* this function would need to be way more complex.
1382+
/// For now it's a good-enough approximation, this is fine :D
1383+
///
1384+
/// The following config
1385+
///
1386+
/// ```
1387+
/// containers:
1388+
/// opa-bundle-builder:
1389+
/// console:
1390+
/// level: DEBUG
1391+
/// file:
1392+
/// level: INFO
1393+
/// loggers:
1394+
/// ROOT:
1395+
/// level: INFO
1396+
/// my.module:
1397+
/// level: DEBUG
1398+
/// some.chatty.module:
1399+
/// level: NONE
1400+
/// ```
1401+
///
1402+
/// should result in
1403+
/// `CONSOLE_LOG_LEVEL=info,my.module=debug,some.chatty.module=none`
1404+
/// and
1405+
/// `FILE_LOG_LEVEL=info,my.module=info,some.chatty.module=none`.
1406+
/// Note that `my.module` is `info` instead of `debug`, because it's clamped by the global file log
1407+
/// level.
1408+
///
1409+
/// Context: https://docs.stackable.tech/home/stable/concepts/logging/
1410+
fn sidecar_container_log_level(
1411+
merged_config: &v1alpha1::OpaConfig,
1412+
sidecar_container: &v1alpha1::Container,
1413+
) -> BundleBuilderLogLevel {
13291414
if let Some(ContainerLogConfig {
13301415
choice: Some(ContainerLogConfigChoice::Automatic(log_config)),
1331-
}) = merged_config
1332-
.logging
1333-
.containers
1334-
.get(&v1alpha1::Container::BundleBuilder)
1416+
}) = merged_config.logging.containers.get(sidecar_container)
13351417
{
13361418
if let Some(logger) = log_config
13371419
.loggers

rust/operator-binary/src/crd/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ pub mod versioned {
180180
Vector,
181181
BundleBuilder,
182182
Opa,
183+
UserInfoFetcher,
183184
}
184185

185186
#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)]

rust/operator-binary/src/main.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use stackable_operator::{
2828
namespace::WatchNamespace,
2929
shared::yaml::SerializeOptions,
3030
telemetry::Tracing,
31+
utils::cluster_info::KubernetesClusterInfo,
3132
};
3233

3334
use crate::controller::OPA_FULL_CONTROLLER_NAME;
@@ -100,12 +101,14 @@ async fn main() -> anyhow::Result<()> {
100101
let client =
101102
client::initialize_operator(Some(OPERATOR_NAME.to_string()), &cluster_info_opts)
102103
.await?;
104+
let kubernetes_cluster_info = client.kubernetes_cluster_info.clone();
103105
create_controller(
104106
client,
105107
product_config,
106108
watch_namespace,
107109
operator_image.clone(),
108110
operator_image,
111+
kubernetes_cluster_info,
109112
)
110113
.await;
111114
}
@@ -123,6 +126,7 @@ async fn create_controller(
123126
watch_namespace: WatchNamespace,
124127
opa_bundle_builder_image: String,
125128
user_info_fetcher_image: String,
129+
cluster_info: KubernetesClusterInfo,
126130
) {
127131
let opa_api: Api<DeserializeGuard<v1alpha1::OpaCluster>> = watch_namespace.get_api(&client);
128132
let daemonsets_api: Api<DeserializeGuard<DaemonSet>> = watch_namespace.get_api(&client);
@@ -150,6 +154,7 @@ async fn create_controller(
150154
product_config,
151155
opa_bundle_builder_image,
152156
user_info_fetcher_image,
157+
cluster_info,
153158
}),
154159
)
155160
// We can let the reporting happen in the background

0 commit comments

Comments
 (0)