Skip to content

Commit 0b67552

Browse files
committed
feat: fault tolerant execution
1 parent de7278a commit 0b67552

File tree

6 files changed

+934
-11
lines changed

6 files changed

+934
-11
lines changed

.github/ISSUE_TEMPLATE/02-bug_report.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ body:
1616
attributes:
1717
label: Affected Trino version
1818
description: Which version of Trino do you see this bug in?
19-
#
19+
#
2020
- type: textarea
2121
attributes:
2222
label: Current and expected behavior

deploy/helm/trino-operator/crds/crds.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ spec:
153153
nullable: true
154154
type: integer
155155
secretClass:
156-
description: Secret class containing the Azure `connectionString`.
156+
description: '[SecretClass](https://docs.stackable.tech/home/nightly/secret-operator/secretclass) providing the Azure `connectionString`.'
157157
type: string
158158
required:
159159
- baseDirectories
@@ -338,11 +338,11 @@ spec:
338338
nullable: true
339339
properties:
340340
key:
341-
description: Key name in the secret that contains the JSON service account key.
341+
description: Key name in the Secret that contains the JSON service account key.
342342
nullable: true
343343
type: string
344344
secretClass:
345-
description: Secret class containing the GCS service account key. The secret should contain a key with the JSON service account key data.
345+
description: '[SecretClass](https://docs.stackable.tech/home/nightly/secret-operator/secretclass) providing the GCS service account key.'
346346
type: string
347347
required:
348348
- secretClass

rust/operator-binary/src/command.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,16 @@ use crate::{
1414
CONFIG_DIR_NAME, Container, LOG_PROPERTIES, RW_CONFIG_DIR_NAME, STACKABLE_CLIENT_TLS_DIR,
1515
STACKABLE_INTERNAL_TLS_DIR, STACKABLE_MOUNT_INTERNAL_TLS_DIR,
1616
STACKABLE_MOUNT_SERVER_TLS_DIR, STACKABLE_SERVER_TLS_DIR, STACKABLE_TLS_STORE_PASSWORD,
17-
SYSTEM_TRUST_STORE, SYSTEM_TRUST_STORE_PASSWORD, TrinoRole, v1alpha1,
17+
SYSTEM_TRUST_STORE, SYSTEM_TRUST_STORE_PASSWORD, TrinoRole,
18+
fault_tolerant_execution::ResolvedFaultTolerantExecutionConfig, v1alpha1,
1819
},
1920
};
2021

2122
pub fn container_prepare_args(
2223
trino: &v1alpha1::TrinoCluster,
2324
catalogs: &[CatalogConfig],
2425
merged_config: &v1alpha1::TrinoConfig,
26+
resolved_fte_config: &Option<ResolvedFaultTolerantExecutionConfig>,
2527
) -> Vec<String> {
2628
let mut args = vec![];
2729

@@ -78,12 +80,18 @@ pub fn container_prepare_args(
7880
args.extend_from_slice(&catalog.init_container_extra_start_commands);
7981
});
8082

83+
// Add the commands that are needed for fault tolerant execution (e.g., TLS certificates for S3)
84+
if let Some(resolved_fte) = resolved_fte_config {
85+
args.extend_from_slice(&resolved_fte.init_container_extra_start_commands);
86+
}
87+
8188
args
8289
}
8390

8491
pub fn container_trino_args(
8592
authentication_config: &TrinoAuthenticationConfig,
8693
catalogs: &[CatalogConfig],
94+
resolved_fte_config: &Option<ResolvedFaultTolerantExecutionConfig>,
8795
) -> Vec<String> {
8896
let mut args = vec![
8997
// copy config files to a writeable empty folder
@@ -110,6 +118,14 @@ pub fn container_trino_args(
110118
args.push(format!("export {env_name}=\"$(cat {file})\""));
111119
}
112120
});
121+
122+
// Add fault tolerant execution environment variables from files
123+
if let Some(resolved_fte) = resolved_fte_config {
124+
for (env_name, file) in &resolved_fte.load_env_from_files {
125+
args.push(format!("export {env_name}=\"$(cat {file})\""));
126+
}
127+
}
128+
113129
args.push("set -x".to_string());
114130

115131
// Start command

rust/operator-binary/src/controller.rs

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,16 @@ use crate::{
7878
command, config,
7979
crd::{
8080
ACCESS_CONTROL_PROPERTIES, APP_NAME, CONFIG_DIR_NAME, CONFIG_PROPERTIES, Container,
81-
DISCOVERY_URI, ENV_INTERNAL_SECRET, HTTP_PORT, HTTP_PORT_NAME, HTTPS_PORT, HTTPS_PORT_NAME,
82-
JVM_CONFIG, JVM_SECURITY_PROPERTIES, LOG_PROPERTIES, MAX_TRINO_LOG_FILES_SIZE,
83-
METRICS_PORT, METRICS_PORT_NAME, NODE_PROPERTIES, RW_CONFIG_DIR_NAME,
84-
STACKABLE_CLIENT_TLS_DIR, STACKABLE_INTERNAL_TLS_DIR, STACKABLE_MOUNT_INTERNAL_TLS_DIR,
85-
STACKABLE_MOUNT_SERVER_TLS_DIR, STACKABLE_SERVER_TLS_DIR, TrinoRole,
81+
DISCOVERY_URI, ENV_INTERNAL_SECRET, EXCHANGE_MANAGER_PROPERTIES, HTTP_PORT, HTTP_PORT_NAME,
82+
HTTPS_PORT, HTTPS_PORT_NAME, JVM_CONFIG, JVM_SECURITY_PROPERTIES, LOG_PROPERTIES,
83+
MAX_TRINO_LOG_FILES_SIZE, METRICS_PORT, METRICS_PORT_NAME, NODE_PROPERTIES,
84+
RW_CONFIG_DIR_NAME, STACKABLE_CLIENT_TLS_DIR, STACKABLE_INTERNAL_TLS_DIR,
85+
STACKABLE_MOUNT_INTERNAL_TLS_DIR, STACKABLE_MOUNT_SERVER_TLS_DIR, STACKABLE_SERVER_TLS_DIR,
86+
TrinoRole,
8687
authentication::resolve_authentication_classes,
8788
catalog,
8889
discovery::{TrinoDiscovery, TrinoDiscoveryProtocol, TrinoPodRef},
90+
fault_tolerant_execution::ResolvedFaultTolerantExecutionConfig,
8991
rolegroup_headless_service_name, v1alpha1,
9092
},
9193
listener::{
@@ -298,6 +300,11 @@ pub enum Error {
298300
source: crate::operations::graceful_shutdown::Error,
299301
},
300302

303+
#[snafu(display("failed to configure fault tolerant execution"))]
304+
FaultTolerantExecution {
305+
source: crate::crd::fault_tolerant_execution::Error,
306+
},
307+
301308
#[snafu(display("failed to get required Labels"))]
302309
GetRequiredLabels {
303310
source:
@@ -424,6 +431,20 @@ pub async fn reconcile_trino(
424431
catalogs.push(catalog_config);
425432
}
426433

434+
// Resolve fault tolerant execution configuration with S3 connections if needed
435+
let resolved_fte_config = match trino.spec.cluster_config.fault_tolerant_execution.as_ref() {
436+
Some(fte_config) => Some(
437+
ResolvedFaultTolerantExecutionConfig::from_config(
438+
fte_config,
439+
Some(client),
440+
&trino.namespace_r().context(ReadRoleSnafu)?,
441+
)
442+
.await
443+
.context(FaultTolerantExecutionSnafu)?,
444+
),
445+
None => None,
446+
};
447+
427448
let validated_config = validated_product_config(
428449
trino,
429450
// The Trino version is a single number like 396.
@@ -526,6 +547,7 @@ pub async fn reconcile_trino(
526547
&trino_authentication_config,
527548
&trino_opa_config,
528549
&client.kubernetes_cluster_info,
550+
&resolved_fte_config,
529551
)?;
530552
let rg_catalog_configmap = build_rolegroup_catalog_config_map(
531553
trino,
@@ -543,6 +565,7 @@ pub async fn reconcile_trino(
543565
&trino_authentication_config,
544566
&catalogs,
545567
&rbac_sa.name_any(),
568+
&resolved_fte_config,
546569
)?;
547570

548571
cluster_resources
@@ -651,6 +674,7 @@ fn build_rolegroup_config_map(
651674
trino_authentication_config: &TrinoAuthenticationConfig,
652675
trino_opa_config: &Option<TrinoOpaConfig>,
653676
cluster_info: &KubernetesClusterInfo,
677+
resolved_fte_config: &Option<ResolvedFaultTolerantExecutionConfig>,
654678
) -> Result<ConfigMap> {
655679
let mut cm_conf_data = BTreeMap::new();
656680

@@ -712,6 +736,16 @@ fn build_rolegroup_config_map(
712736
dynamic_resolved_config
713737
.extend(graceful_shutdown_config_properties(trino, trino_role));
714738

739+
// Add fault tolerant execution properties from resolved configuration
740+
if let Some(resolved_fte) = resolved_fte_config {
741+
dynamic_resolved_config.extend(
742+
resolved_fte
743+
.config_properties
744+
.iter()
745+
.map(|(k, v)| (k.clone(), Some(v.clone()))),
746+
);
747+
}
748+
715749
// Add static properties and overrides
716750
dynamic_resolved_config.extend(transformed_config);
717751

@@ -776,6 +810,22 @@ fn build_rolegroup_config_map(
776810

777811
cm_conf_data.insert(JVM_CONFIG.to_string(), jvm_config.to_string());
778812

813+
// Add exchange manager properties from resolved fault tolerant execution configuration
814+
if let Some(resolved_fte) = resolved_fte_config {
815+
if !resolved_fte.exchange_manager_properties.is_empty() {
816+
let exchange_props_with_options: BTreeMap<String, Option<String>> = resolved_fte
817+
.exchange_manager_properties
818+
.iter()
819+
.map(|(k, v)| (k.clone(), Some(v.clone())))
820+
.collect();
821+
cm_conf_data.insert(
822+
EXCHANGE_MANAGER_PROPERTIES.to_string(),
823+
to_java_properties_string(exchange_props_with_options.iter())
824+
.with_context(|_| FailedToWriteJavaPropertiesSnafu)?,
825+
);
826+
}
827+
}
828+
779829
let jvm_sec_props: BTreeMap<String, Option<String>> = config
780830
.get(&PropertyNameKind::File(JVM_SECURITY_PROPERTIES.to_string()))
781831
.cloned()
@@ -884,6 +934,7 @@ fn build_rolegroup_statefulset(
884934
trino_authentication_config: &TrinoAuthenticationConfig,
885935
catalogs: &[CatalogConfig],
886936
sa_name: &str,
937+
resolved_fte_config: &Option<ResolvedFaultTolerantExecutionConfig>,
887938
) -> Result<StatefulSet> {
888939
let role = trino
889940
.role(trino_role)
@@ -974,6 +1025,7 @@ fn build_rolegroup_statefulset(
9741025
&mut cb_trino,
9751026
catalogs,
9761027
&requested_secret_lifetime,
1028+
resolved_fte_config,
9771029
)?;
9781030

9791031
let mut prepare_args = vec![];
@@ -992,6 +1044,7 @@ fn build_rolegroup_statefulset(
9921044
trino,
9931045
catalogs,
9941046
merged_config,
1047+
resolved_fte_config,
9951048
));
9961049

9971050
prepare_args
@@ -1056,7 +1109,12 @@ fn build_rolegroup_statefulset(
10561109
"-c".to_string(),
10571110
])
10581111
.args(vec![
1059-
command::container_trino_args(trino_authentication_config, catalogs).join("\n"),
1112+
command::container_trino_args(
1113+
trino_authentication_config,
1114+
catalogs,
1115+
resolved_fte_config,
1116+
)
1117+
.join("\n"),
10601118
])
10611119
.add_env_vars(env)
10621120
.add_volume_mount("config", CONFIG_DIR_NAME)
@@ -1532,6 +1590,7 @@ fn tls_volume_mounts(
15321590
cb_trino: &mut ContainerBuilder,
15331591
catalogs: &[CatalogConfig],
15341592
requested_secret_lifetime: &Duration,
1593+
resolved_fte_config: &Option<ResolvedFaultTolerantExecutionConfig>,
15351594
) -> Result<()> {
15361595
if let Some(server_tls) = trino.get_server_tls() {
15371596
cb_prepare
@@ -1611,6 +1670,19 @@ fn tls_volume_mounts(
16111670
.context(AddVolumeSnafu)?;
16121671
}
16131672

1673+
// fault tolerant execution S3 credentials and other resources
1674+
if let Some(resolved_fte) = resolved_fte_config {
1675+
cb_prepare
1676+
.add_volume_mounts(resolved_fte.volume_mounts.clone())
1677+
.context(AddVolumeMountSnafu)?;
1678+
cb_trino
1679+
.add_volume_mounts(resolved_fte.volume_mounts.clone())
1680+
.context(AddVolumeMountSnafu)?;
1681+
pod_builder
1682+
.add_volumes(resolved_fte.volumes.clone())
1683+
.context(AddVolumeSnafu)?;
1684+
}
1685+
16141686
Ok(())
16151687
}
16161688

@@ -1780,6 +1852,7 @@ mod tests {
17801852
&trino_authentication_config,
17811853
&trino_opa_config,
17821854
&cluster_info,
1855+
&None,
17831856
)
17841857
.unwrap()
17851858
}

0 commit comments

Comments
 (0)