|
| 1 | +use crate::k8s::{K8sClient, K8sError, PodPhase}; |
1 | 2 | use async_trait::async_trait; |
2 | 3 | use base64::{Engine, prelude::BASE64_STANDARD}; |
| 4 | +use etl_config::Environment; |
3 | 5 | use k8s_openapi::api::{ |
4 | 6 | apps::v1::StatefulSet, |
5 | 7 | core::v1::{ConfigMap, Pod, Secret}, |
6 | 8 | }; |
7 | | -use serde_json::json; |
8 | | -use std::collections::BTreeMap; |
9 | | -use thiserror::Error; |
10 | | -use tracing::info; |
11 | | - |
12 | 9 | use kube::{ |
13 | 10 | Client, |
14 | 11 | api::{Api, DeleteParams, Patch, PatchParams}, |
15 | 12 | }; |
| 13 | +use serde_json::json; |
| 14 | +use std::collections::BTreeMap; |
| 15 | +use tracing::info; |
16 | 16 |
|
17 | | -#[derive(Debug, Error)] |
18 | | -pub enum K8sError { |
19 | | - #[error("serde_json error: {0}")] |
20 | | - Serde(#[from] serde_json::error::Error), |
21 | | - |
22 | | - #[error("kube error: {0}")] |
23 | | - Kube(#[from] kube::Error), |
24 | | -} |
25 | | - |
26 | | -pub enum PodPhase { |
27 | | - Pending, |
28 | | - Running, |
29 | | - Succeeded, |
30 | | - Failed, |
31 | | - Unknown, |
32 | | -} |
33 | | - |
34 | | -impl From<&str> for PodPhase { |
35 | | - fn from(value: &str) -> Self { |
36 | | - match value { |
37 | | - "Pending" => PodPhase::Pending, |
38 | | - "Running" => PodPhase::Running, |
39 | | - "Succeeded" => PodPhase::Succeeded, |
40 | | - "Failed" => PodPhase::Failed, |
41 | | - _ => PodPhase::Unknown, |
42 | | - } |
43 | | - } |
44 | | -} |
45 | | - |
46 | | -#[async_trait] |
47 | | -pub trait K8sClient: Send + Sync { |
48 | | - async fn create_or_update_postgres_secret( |
49 | | - &self, |
50 | | - prefix: &str, |
51 | | - postgres_password: &str, |
52 | | - ) -> Result<(), K8sError>; |
53 | | - |
54 | | - async fn create_or_update_bq_secret( |
55 | | - &self, |
56 | | - prefix: &str, |
57 | | - bq_service_account_key: &str, |
58 | | - ) -> Result<(), K8sError>; |
59 | | - |
60 | | - async fn delete_postgres_secret(&self, prefix: &str) -> Result<(), K8sError>; |
61 | | - |
62 | | - async fn delete_bq_secret(&self, prefix: &str) -> Result<(), K8sError>; |
63 | | - |
64 | | - async fn get_config_map(&self, config_map_name: &str) -> Result<ConfigMap, K8sError>; |
65 | | - |
66 | | - async fn create_or_update_config_map( |
67 | | - &self, |
68 | | - prefix: &str, |
69 | | - base_config: &str, |
70 | | - prod_config: &str, |
71 | | - ) -> Result<(), K8sError>; |
72 | | - |
73 | | - async fn delete_config_map(&self, prefix: &str) -> Result<(), K8sError>; |
74 | | - |
75 | | - async fn create_or_update_stateful_set( |
76 | | - &self, |
77 | | - prefix: &str, |
78 | | - replicator_image: &str, |
79 | | - template_annotations: Option<BTreeMap<String, String>>, |
80 | | - ) -> Result<(), K8sError>; |
81 | | - |
82 | | - async fn delete_stateful_set(&self, prefix: &str) -> Result<(), K8sError>; |
83 | | - |
84 | | - async fn get_pod_phase(&self, prefix: &str) -> Result<PodPhase, K8sError>; |
85 | | - |
86 | | - async fn has_replicator_container_error(&self, prefix: &str) -> Result<bool, K8sError>; |
87 | | - |
88 | | - async fn delete_pod(&self, prefix: &str) -> Result<(), K8sError>; |
89 | | -} |
90 | | - |
91 | | -#[derive(Debug)] |
92 | | -pub struct HttpK8sClient { |
93 | | - secrets_api: Api<Secret>, |
94 | | - config_maps_api: Api<ConfigMap>, |
95 | | - stateful_sets_api: Api<StatefulSet>, |
96 | | - pods_api: Api<Pod>, |
97 | | -} |
98 | | - |
| 17 | +/// Secret name suffix for the BigQuery service account key. |
99 | 18 | const BQ_SECRET_NAME_SUFFIX: &str = "bq-service-account-key"; |
| 19 | +/// Secret name suffix for the Postgres password. |
100 | 20 | const POSTGRES_SECRET_NAME_SUFFIX: &str = "postgres-password"; |
| 21 | +/// ConfigMap name suffix for the replicator configuration files. |
101 | 22 | const REPLICATOR_CONFIG_MAP_NAME_SUFFIX: &str = "replicator-config"; |
| 23 | +/// StatefulSet name suffix for the replicator workload. |
102 | 24 | const REPLICATOR_STATEFUL_SET_SUFFIX: &str = "replicator-stateful-set"; |
| 25 | +/// Application label suffix used to group resources. |
103 | 26 | const REPLICATOR_APP_SUFFIX: &str = "replicator-app"; |
| 27 | +/// Container name suffix for the replicator container. |
104 | 28 | const REPLICATOR_CONTAINER_NAME_SUFFIX: &str = "replicator"; |
| 29 | +/// Container name suffix for the Vector sidecar. |
105 | 30 | const VECTOR_CONTAINER_NAME_SUFFIX: &str = "vector"; |
| 31 | +/// Namespace where data-plane resources are created. |
106 | 32 | const DATA_PLANE_NAMESPACE: &str = "etl-data-plane"; |
| 33 | +/// Secret storing the Logflare API key. |
107 | 34 | const LOGFLARE_SECRET_NAME: &str = "replicator-logflare-api-key"; |
| 35 | +/// Docker image used for the Vector sidecar. |
108 | 36 | const VECTOR_IMAGE_NAME: &str = "timberio/vector:0.46.1-distroless-libc"; |
| 37 | +/// ConfigMap name containing the Vector configuration. |
109 | 38 | const VECTOR_CONFIG_MAP_NAME: &str = "replicator-vector-config"; |
| 39 | +/// Volume name for the replicator config file. |
110 | 40 | const REPLICATOR_CONFIG_FILE_VOLUME_NAME: &str = "replicator-config-file"; |
| 41 | +/// Volume name for the Vector config file. |
111 | 42 | const VECTOR_CONFIG_FILE_VOLUME_NAME: &str = "vector-config-file"; |
| 43 | +/// Secret storing the Sentry DSN. |
112 | 44 | const SENTRY_DSN_SECRET_NAME: &str = "replicator-sentry-dsn"; |
| 45 | +/// EmptyDir volume name used to share logs. |
113 | 46 | const LOGS_VOLUME_NAME: &str = "logs"; |
| 47 | +/// ConfigMap name providing trusted root certificates. |
114 | 48 | pub const TRUSTED_ROOT_CERT_CONFIG_MAP_NAME: &str = "trusted-root-certs-config"; |
| 49 | +/// Key inside the trusted root certificates ConfigMap. |
115 | 50 | pub const TRUSTED_ROOT_CERT_KEY_NAME: &str = "trusted_root_certs"; |
| 51 | +/// Environment variable for the Postgres password. |
116 | 52 | const PG_PASSWORD_ENV_VAR_NAME: &str = "APP_PIPELINE__PG_CONNECTION__PASSWORD"; |
| 53 | +/// Environment variable for the BigQuery service account key. |
117 | 54 | const BIG_QUERY_SA_KEY_ENV_VAR_NAME: &str = "APP_DESTINATION__BIG_QUERY__SERVICE_ACCOUNT_KEY"; |
| 55 | +/// Pod template annotation used to trigger rolling restarts. |
118 | 56 | pub const RESTARTED_AT_ANNOTATION_KEY: &str = "etl.supabase.com/restarted-at"; |
| 57 | +/// Label used to identify replicator pods. |
119 | 58 | const REPLICATOR_APP_LABEL: &str = "etl-replicator-app"; |
120 | 59 |
|
| 60 | +/// Replicator memory limit tuned for `c6in.4xlarge` instances. |
| 61 | +const REPLICATOR_MAX_MEMORY_PROD: &str = "500Mi"; |
| 62 | +/// Replicator CPU limit tuned for `c6in.4xlarge` instances. |
| 63 | +const REPLICATOR_MAX_CPU_PROD: &str = "100m"; |
| 64 | +/// Replicator memory limit tuned for `t3.small` instances. |
| 65 | +const REPLICATOR_MAX_MEMORY_STAGING: &str = "100Mi"; |
| 66 | +/// Replicator CPU limit tuned for `t3.small` instances. |
| 67 | +const REPLICATOR_MAX_CPU_STAGING: &str = "100m"; |
| 68 | + |
| 69 | +/// Runtime limits derived from the current environment. |
| 70 | +struct DynamicReplicatorConfig { |
| 71 | + max_memory: &'static str, |
| 72 | + max_cpu: &'static str, |
| 73 | +} |
| 74 | + |
| 75 | +impl DynamicReplicatorConfig { |
| 76 | + /// Loads the runtime limits for the current environment. |
| 77 | + fn load() -> Result<Self, K8sError> { |
| 78 | + let environment = Environment::load().map_err(|_| K8sError::ReplicatorConfiguration)?; |
| 79 | + |
| 80 | + let config = match environment { |
| 81 | + Environment::Prod => Self { |
| 82 | + max_memory: REPLICATOR_MAX_MEMORY_PROD, |
| 83 | + max_cpu: REPLICATOR_MAX_CPU_PROD, |
| 84 | + }, |
| 85 | + _ => Self { |
| 86 | + max_memory: REPLICATOR_MAX_MEMORY_STAGING, |
| 87 | + max_cpu: REPLICATOR_MAX_CPU_STAGING, |
| 88 | + }, |
| 89 | + }; |
| 90 | + |
| 91 | + Ok(config) |
| 92 | + } |
| 93 | +} |
| 94 | + |
| 95 | +/// HTTP-based implementation of [`K8sClient`]. |
| 96 | +/// |
| 97 | +/// The client is namespaced to the data-plane namespace and uses server-side |
| 98 | +/// apply to keep resources in sync. |
| 99 | +#[derive(Debug)] |
| 100 | +pub struct HttpK8sClient { |
| 101 | + secrets_api: Api<Secret>, |
| 102 | + config_maps_api: Api<ConfigMap>, |
| 103 | + stateful_sets_api: Api<StatefulSet>, |
| 104 | + pods_api: Api<Pod>, |
| 105 | +} |
| 106 | + |
121 | 107 | impl HttpK8sClient { |
| 108 | + /// Creates a new [`HttpK8sClient`] using the ambient Kubernetes config. |
| 109 | + /// |
| 110 | + /// Prefers in-cluster configuration and falls back to the local kubeconfig |
| 111 | + /// when running outside the cluster. |
122 | 112 | pub async fn new() -> Result<HttpK8sClient, K8sError> { |
123 | 113 | let client = Client::try_default().await?; |
124 | 114 |
|
@@ -320,6 +310,8 @@ impl K8sClient for HttpK8sClient { |
320 | 310 | let bq_secret_name = format!("{prefix}-{BQ_SECRET_NAME_SUFFIX}"); |
321 | 311 | let replicator_config_map_name = format!("{prefix}-{REPLICATOR_CONFIG_MAP_NAME_SUFFIX}"); |
322 | 312 |
|
| 313 | + let config = DynamicReplicatorConfig::load()?; |
| 314 | + |
323 | 315 | let mut stateful_set_json = json!({ |
324 | 316 | "apiVersion": "apps/v1", |
325 | 317 | "kind": "StatefulSet", |
@@ -393,11 +385,12 @@ impl K8sClient for HttpK8sClient { |
393 | 385 | ], |
394 | 386 | "resources": { |
395 | 387 | "limits": { |
396 | | - "memory": "200Mi", |
| 388 | + "memory": config.max_memory, |
| 389 | + "cpu": config.max_cpu, |
397 | 390 | }, |
398 | 391 | "requests": { |
399 | | - "memory": "200Mi", |
400 | | - "cpu": "100m" |
| 392 | + "memory": config.max_memory, |
| 393 | + "cpu": config.max_cpu, |
401 | 394 | } |
402 | 395 | }, |
403 | 396 | "volumeMounts": [ |
|
0 commit comments