diff --git a/charts/cf-runtime/Chart.yaml b/charts/cf-runtime/Chart.yaml index bb02adf2..08840d7b 100644 --- a/charts/cf-runtime/Chart.yaml +++ b/charts/cf-runtime/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 description: A Helm chart for Codefresh Runner name: cf-runtime -version: 8.1.0 +version: 8.2.0 keywords: - codefresh - runner @@ -17,8 +17,18 @@ annotations: artifacthub.io/containsSecurityUpdates: "false" # Supported kinds: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`: artifacthub.io/changes: | + - kind: changed + description: "Update \"engine\" to version 1.179.1." - kind: added - description: "Added MAXIMUM_POST_STEPS_GRACE_PERIOD_MINUTES configuration for engine which controls maximum time for internal build chores before termination." + description: "Add support for OpenTelemetry signals: metrics, logs, traces." + - kind: added + description: "Add support for Pyroscope profiles." + - kind: changed + description: "Redesign \"engine\" metrics to follow OpenTelemetry standards and provide more comprehensive insights about Classic Build execution. Please read upgrade notes for more details." + - kind: deprecated + description: "Deprecate legacy Prometheus metrics in favor of new OpenTelemetry metrics in \"engine\". Please read upgrade notes for more details." + - kind: changed + description: "Improve observability of build's \"Initializing Process\" step by providing more logs and more detailed status of the step." dependencies: - name: cf-common repository: oci://quay.io/codefresh/charts diff --git a/charts/cf-runtime/README.md b/charts/cf-runtime/README.md index 7629c4ed..c5d14872 100644 --- a/charts/cf-runtime/README.md +++ b/charts/cf-runtime/README.md @@ -1,6 +1,6 @@ ## Codefresh Runner -![Version: 8.1.0](https://img.shields.io/badge/Version-8.1.0-informational?style=flat-square) +![Version: 8.2.0](https://img.shields.io/badge/Version-8.2.0-informational?style=flat-square) Helm chart for deploying [Codefresh Runner](https://codefresh.io/docs/docs/installation/codefresh-runner/) to Kubernetes. @@ -20,6 +20,7 @@ Helm chart for deploying [Codefresh Runner](https://codefresh.io/docs/docs/insta - [To 7.x](#to-7-x) - [To 7.9.x](#to-7-9-x) - [To 8.x](#to-8-x) + - [To 8.2.x](#to-8-2-x) - [Architecture](#architecture) - [Configuration](#configuration) - [EBS backend volume configuration in AWS](#ebs-backend-volume-configuration) @@ -313,6 +314,29 @@ This means that any existing images in your pipelines that were created using th To avoid operation disruption, you have to identify and convert such deprecated images to modern formats. Tutorial: [https://codefresh.io/docs/docs/kb/articles/upgrade-deprecated-docker-images/](https://codefresh.io/docs/docs/kb/articles/upgrade-deprecated-docker-images/) +### To 8.2.x + +⚠️⚠️⚠️ **BREAKING CHANGE in metrics configuration** ⚠️⚠️⚠️ + +In this release, the `engine` component has migrated its metrics collection to OpenTelemetry, using the *push* model by default. + +You can still switch to the *pull* model by setting the `OTEL_METRICS_EXPORTER=prometheus` environment variable for the `engine`. However, we recommend using the default configuration, as it is better suited for the short-lived nature of Classic Builds and provides more precise and complete metrics. + +View [default chart values](https://artifacthub.io/packages/helm/codefresh-runner/cf-runtime?modal=values&path=runtime.engine.env) for more configuration options. + +The `engine` metrics have also been redesigned to follow OpenTelemetry standards and to deliver more actionable insights. Full list of metrics: https://codefresh.io/docs/docs/installation/runner/classic-runtime-monitoring/ + +For a smooth transition, the previous Prometheus metrics are still available but are now disabled by default. **These legacy metrics will be removed in future releases.** If you need to temporarily retain the old metrics, add the following values to your chart configuration: + +```yaml +runtime: + engine: + env: + CF_TELEMETRY_PROMETHEUS_ENABLE: "false" # Disable new Prometheus metrics to avoid ports conflict and data duplication + CF_TELEMETRY_OTEL_ENABLE: "false" # Disable new OTel metrics to avoid data duplication + METRICS_PROMETHEUS_ENABLED: "true" # Enable old Prometheus metrics +``` + ## Architecture [Codefresh Runner architecture](https://codefresh.io/docs/docs/installation/codefresh-runner/#codefresh-runner-architecture) @@ -1243,7 +1267,7 @@ Install the Helm chart | monitor.tolerations | list | `[]` | Set tolerations | | monitor.updateStrategy | object | `{"type":"RollingUpdate"}` | Upgrade strategy | | nameOverride | string | `""` | String to partially override cf-runtime.fullname template (will maintain the release name) | -| podMonitor | object | See below | Add podMonitor (for engine pods) | +| podMonitor | object | See below | Add podMonitor | | podMonitor.main.enabled | bool | `false` | Enable pod monitor for engine pods | | podMonitor.runner.enabled | bool | `false` | Enable pod monitor for runner pod | | podMonitor.volume-provisioner.enabled | bool | `false` | Enable pod monitor for volumeProvisioner pod | @@ -1300,23 +1324,45 @@ Install the Helm chart | runtime.dind.userVolumeMounts | object | `{}` | Add extra volume mounts | | runtime.dind.userVolumes | object | `{}` | Add extra volumes | | runtime.dindDaemon | object | See below | DinD pod daemon config | -| runtime.engine | object | `{"affinity":{},"command":["npm","run","start"],"env":{"CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS":1000,"DOCKER_REQUEST_TIMEOUT_MS":30000,"FORCE_COMPOSE_SERIAL_PULL":false,"LOGGER_LEVEL":"debug","LOG_OUTGOING_HTTP_REQUESTS":false,"METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS":false,"METRICS_PROMETHEUS_ENABLED":true,"METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS":false,"METRICS_PROMETHEUS_HOST":"0.0.0.0","METRICS_PROMETHEUS_PORT":9100,"METRICS_PROMETHEUS_SCRAPE_TIMEOUT":"15000","TRUSTED_QEMU_IMAGES":"tonistiigi/binfmt"},"image":{"digest":"sha256:c8e74362a3462a635cad70ac81877a7d3a0d4833cfaefb8d3b8b4b90e8c95159","pullPolicy":"IfNotPresent","registry":"quay.io","repository":"codefresh/engine","tag":"1.178.3"},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"resources":{"limits":{"cpu":"1000m","memory":"2048Mi"},"requests":{"cpu":"100m","memory":"128Mi"}},"runtimeImages":{"alpine":{"digest":"sha256:115729ec5cb049ba6359c3ab005ac742012d92bbaa5b8bc1a878f1e8f62c0cb8","registry":"docker.io","repository":"alpine","tag":"edge"},"compose":{"digest":"sha256:e74494370100678ccb1c1058e6ef3ddcf67b21fcd37da8b3482376c8282549ad","registry":"quay.io","repository":"codefresh/compose","tag":"v2.37.0-1.5.4"},"container-logger":{"digest":"sha256:6e376bb00e824827cb038e15160ccf0fead4f868197b75bbc80dbd6bc34af8d6","registry":"quay.io","repository":"codefresh/cf-container-logger","tag":"1.12.8"},"cosign-image-signer":{"digest":"sha256:ad74291dc11833e13dbf7ae1919446dee2baedb16b96a8a3acc600b5499c716d","registry":"quay.io","repository":"codefresh/cf-cosign-image-signer","tag":"2.5.2-cf.1"},"default-qemu":{"digest":"sha256:1b804311fe87047a4c96d38b4b3ef6f62fca8cd125265917a9e3dc3c996c39e6","registry":"docker.io","repository":"tonistiigi/binfmt","tag":"qemu-v9.2.2"},"docker-builder":{"digest":"sha256:1d02df4dcf703a97c7a64b147cd2c3f6ec2c708aad16be5abbd337f3c13a48ad","registry":"quay.io","repository":"codefresh/cf-docker-builder","tag":"1.4.7"},"docker-puller":{"digest":"sha256:914f071bcb1893bcb42c3f8907f8f3874f1f30db1a2ccaa4b825dab9bb157e60","registry":"quay.io","repository":"codefresh/cf-docker-puller","tag":"8.0.22"},"docker-pusher":{"digest":"sha256:bad3773029a68f33953f1dc245cb92c386b5311a996340eea41fe6b9cc52a96c","registry":"quay.io","repository":"codefresh/cf-docker-pusher","tag":"6.0.20"},"docker-tag-pusher":{"digest":"sha256:ec4416525bbf4912786035fbb2e1f26ae04f94559c535f02232b48eb0a1c5fa7","registry":"quay.io","repository":"codefresh/cf-docker-tag-pusher","tag":"1.3.19"},"fs-ops":{"digest":"sha256:70d53821b9314d88e3571dfb096e8f577caf3e4c2199253621b8d0c85d20b8ad","registry":"quay.io","repository":"codefresh/fs-ops","tag":"1.2.10"},"gc-builder":{"digest":"sha256:33ac914e6b844909f188a208cf90e569358cafa5aaa60f49848f49d99bcaf875","registry":"quay.io","repository":"codefresh/cf-gc-builder","tag":"0.5.3"},"git-cloner":{"digest":"sha256:2e09eef18d5caddae708058ec63247825ac4e4ee5e5763986f65e1312fbcc449","registry":"quay.io","repository":"codefresh/cf-git-cloner","tag":"10.3.2"},"kube-deploy":{"digest":"sha256:35649b14eb43717d3752d08597ada77d3737b2508f1b8e1f52f67b7a0e5ff263","registry":"quay.io","repository":"codefresh/cf-deploy-kubernetes","tag":"16.2.9"},"pipeline-debugger":{"digest":"sha256:37975653b4ef5378bd1e38d453c7dac4721cba1c1977a5ca6118a67b98a47925","registry":"quay.io","repository":"codefresh/cf-debugger","tag":"1.3.9"},"template-engine":{"digest":"sha256:b3f499fcf93037e69fba599d2f292cfc9f28a158052dd57d5de9cdf9756f1f60","registry":"quay.io","repository":"codefresh/pikolo","tag":"0.14.6"}},"runtimeImagesRegisty":"","schedulerName":"","serviceAccount":"codefresh-engine","terminationGracePeriodSeconds":180,"tolerations":[],"userEnvVars":[],"workflowLimits":{"MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS":600,"MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION":86400,"MAXIMUM_ELECTED_STATE_AGE_ALLOWED":900,"MAXIMUM_POST_STEPS_GRACE_PERIOD_MINUTES":30,"MAXIMUM_RETRY_ATTEMPTS_ALLOWED":20,"MAXIMUM_TERMINATING_STATE_AGE_ALLOWED":900,"MAXIMUM_TERMINATING_STATE_AGE_ALLOWED_WITHOUT_UPDATE":300,"TIME_ENGINE_INACTIVE_UNTIL_TERMINATION":300,"TIME_ENGINE_INACTIVE_UNTIL_UNHEALTHY":60,"TIME_INACTIVE_UNTIL_TERMINATION":2700}}` | Parameters for Engine pod (aka "pipeline" orchestrator). | +| runtime.engine | object | `{"affinity":{},"command":["npm","run","start"],"env":{"CF_TELEMETRY_LOGS_LEVEL":"debug","CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION":"false","CF_TELEMETRY_OTEL_ENABLE":"true","CF_TELEMETRY_PROMETHEUS_ENABLE":"false","CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS":"false","CF_TELEMETRY_PROMETHEUS_HOST":"0.0.0.0","CF_TELEMETRY_PROMETHEUS_PORT":"9100","CF_TELEMETRY_PYROSCOPE_ENABLE":"false","CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS":1000,"DOCKER_REQUEST_TIMEOUT_MS":30000,"FORCE_COMPOSE_SERIAL_PULL":false,"LOGGER_LEVEL":"debug","LOG_OUTGOING_HTTP_REQUESTS":false,"METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS":false,"METRICS_PROMETHEUS_ENABLED":false,"METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS":false,"METRICS_PROMETHEUS_HOST":"0.0.0.0","METRICS_PROMETHEUS_PORT":9100,"METRICS_PROMETHEUS_SCRAPE_TIMEOUT":"15000","METRICS_SCRAPE_TIMEOUT_MS":"0","OTEL_EXPORTER_OTLP_COMPRESSION":"gzip","OTEL_EXPORTER_OTLP_ENDPOINT":"http://localhost:4317","OTEL_EXPORTER_OTLP_PROTOCOL":"grpc","OTEL_EXPORTER_PROMETHEUS_HOST":"0.0.0.0","OTEL_EXPORTER_PROMETHEUS_PORT":"9464","OTEL_LOGS_EXPORTER":"none","OTEL_METRICS_EXPORTER":"otlp","OTEL_METRIC_EXPORT_INTERVAL":"10000","OTEL_METRIC_EXPORT_TIMEOUT":"5000","OTEL_SEMCONV_STABILITY_OPT_IN":"http","OTEL_TRACES_EXPORTER":"none","OTEL_TRACES_SAMPLER":"parentbased_always_on","PYROSCOPE_SERVER_ADDRESS":"","TRUSTED_QEMU_IMAGES":"tonistiigi/binfmt"},"image":{"digest":"sha256:37caef1e58f8d07ed76da753fb46eb59224e723495c1b9081d3ef7e0bc9449f9","pullPolicy":"IfNotPresent","registry":"quay.io","repository":"codefresh/engine","tag":"1.179.1"},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"resources":{"limits":{"cpu":"1000m","memory":"2048Mi"},"requests":{"cpu":"100m","memory":"128Mi"}},"runtimeImages":{"alpine":{"digest":"sha256:115729ec5cb049ba6359c3ab005ac742012d92bbaa5b8bc1a878f1e8f62c0cb8","registry":"docker.io","repository":"alpine","tag":"edge"},"compose":{"digest":"sha256:e74494370100678ccb1c1058e6ef3ddcf67b21fcd37da8b3482376c8282549ad","registry":"quay.io","repository":"codefresh/compose","tag":"v2.37.0-1.5.4"},"container-logger":{"digest":"sha256:6e376bb00e824827cb038e15160ccf0fead4f868197b75bbc80dbd6bc34af8d6","registry":"quay.io","repository":"codefresh/cf-container-logger","tag":"1.12.8"},"cosign-image-signer":{"digest":"sha256:ad74291dc11833e13dbf7ae1919446dee2baedb16b96a8a3acc600b5499c716d","registry":"quay.io","repository":"codefresh/cf-cosign-image-signer","tag":"2.5.2-cf.1"},"default-qemu":{"digest":"sha256:1b804311fe87047a4c96d38b4b3ef6f62fca8cd125265917a9e3dc3c996c39e6","registry":"docker.io","repository":"tonistiigi/binfmt","tag":"qemu-v9.2.2"},"docker-builder":{"digest":"sha256:1d02df4dcf703a97c7a64b147cd2c3f6ec2c708aad16be5abbd337f3c13a48ad","registry":"quay.io","repository":"codefresh/cf-docker-builder","tag":"1.4.7"},"docker-puller":{"digest":"sha256:914f071bcb1893bcb42c3f8907f8f3874f1f30db1a2ccaa4b825dab9bb157e60","registry":"quay.io","repository":"codefresh/cf-docker-puller","tag":"8.0.22"},"docker-pusher":{"digest":"sha256:bad3773029a68f33953f1dc245cb92c386b5311a996340eea41fe6b9cc52a96c","registry":"quay.io","repository":"codefresh/cf-docker-pusher","tag":"6.0.20"},"docker-tag-pusher":{"digest":"sha256:ec4416525bbf4912786035fbb2e1f26ae04f94559c535f02232b48eb0a1c5fa7","registry":"quay.io","repository":"codefresh/cf-docker-tag-pusher","tag":"1.3.19"},"fs-ops":{"digest":"sha256:70d53821b9314d88e3571dfb096e8f577caf3e4c2199253621b8d0c85d20b8ad","registry":"quay.io","repository":"codefresh/fs-ops","tag":"1.2.10"},"gc-builder":{"digest":"sha256:33ac914e6b844909f188a208cf90e569358cafa5aaa60f49848f49d99bcaf875","registry":"quay.io","repository":"codefresh/cf-gc-builder","tag":"0.5.3"},"git-cloner":{"digest":"sha256:2e09eef18d5caddae708058ec63247825ac4e4ee5e5763986f65e1312fbcc449","registry":"quay.io","repository":"codefresh/cf-git-cloner","tag":"10.3.2"},"kube-deploy":{"digest":"sha256:35649b14eb43717d3752d08597ada77d3737b2508f1b8e1f52f67b7a0e5ff263","registry":"quay.io","repository":"codefresh/cf-deploy-kubernetes","tag":"16.2.9"},"pipeline-debugger":{"digest":"sha256:37975653b4ef5378bd1e38d453c7dac4721cba1c1977a5ca6118a67b98a47925","registry":"quay.io","repository":"codefresh/cf-debugger","tag":"1.3.9"},"template-engine":{"digest":"sha256:b3f499fcf93037e69fba599d2f292cfc9f28a158052dd57d5de9cdf9756f1f60","registry":"quay.io","repository":"codefresh/pikolo","tag":"0.14.6"}},"runtimeImagesRegisty":"","schedulerName":"","serviceAccount":"codefresh-engine","terminationGracePeriodSeconds":180,"tolerations":[],"userEnvVars":[],"workflowLimits":{"MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS":600,"MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION":86400,"MAXIMUM_ELECTED_STATE_AGE_ALLOWED":900,"MAXIMUM_POST_STEPS_GRACE_PERIOD_MINUTES":30,"MAXIMUM_RETRY_ATTEMPTS_ALLOWED":20,"MAXIMUM_TERMINATING_STATE_AGE_ALLOWED":900,"MAXIMUM_TERMINATING_STATE_AGE_ALLOWED_WITHOUT_UPDATE":300,"TIME_ENGINE_INACTIVE_UNTIL_TERMINATION":300,"TIME_ENGINE_INACTIVE_UNTIL_UNHEALTHY":60,"TIME_INACTIVE_UNTIL_TERMINATION":2700}}` | Parameters for Engine pod (aka "pipeline" orchestrator). | | runtime.engine.affinity | object | `{}` | Set affinity | | runtime.engine.command | list | `["npm","run","start"]` | Set container command. | -| runtime.engine.env | object | `{"CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS":1000,"DOCKER_REQUEST_TIMEOUT_MS":30000,"FORCE_COMPOSE_SERIAL_PULL":false,"LOGGER_LEVEL":"debug","LOG_OUTGOING_HTTP_REQUESTS":false,"METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS":false,"METRICS_PROMETHEUS_ENABLED":true,"METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS":false,"METRICS_PROMETHEUS_HOST":"0.0.0.0","METRICS_PROMETHEUS_PORT":9100,"METRICS_PROMETHEUS_SCRAPE_TIMEOUT":"15000","TRUSTED_QEMU_IMAGES":"tonistiigi/binfmt"}` | Set additional env vars. | +| runtime.engine.env | object | `{"CF_TELEMETRY_LOGS_LEVEL":"debug","CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION":"false","CF_TELEMETRY_OTEL_ENABLE":"true","CF_TELEMETRY_PROMETHEUS_ENABLE":"false","CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS":"false","CF_TELEMETRY_PROMETHEUS_HOST":"0.0.0.0","CF_TELEMETRY_PROMETHEUS_PORT":"9100","CF_TELEMETRY_PYROSCOPE_ENABLE":"false","CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS":1000,"DOCKER_REQUEST_TIMEOUT_MS":30000,"FORCE_COMPOSE_SERIAL_PULL":false,"LOGGER_LEVEL":"debug","LOG_OUTGOING_HTTP_REQUESTS":false,"METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS":false,"METRICS_PROMETHEUS_ENABLED":false,"METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS":false,"METRICS_PROMETHEUS_HOST":"0.0.0.0","METRICS_PROMETHEUS_PORT":9100,"METRICS_PROMETHEUS_SCRAPE_TIMEOUT":"15000","METRICS_SCRAPE_TIMEOUT_MS":"0","OTEL_EXPORTER_OTLP_COMPRESSION":"gzip","OTEL_EXPORTER_OTLP_ENDPOINT":"http://localhost:4317","OTEL_EXPORTER_OTLP_PROTOCOL":"grpc","OTEL_EXPORTER_PROMETHEUS_HOST":"0.0.0.0","OTEL_EXPORTER_PROMETHEUS_PORT":"9464","OTEL_LOGS_EXPORTER":"none","OTEL_METRICS_EXPORTER":"otlp","OTEL_METRIC_EXPORT_INTERVAL":"10000","OTEL_METRIC_EXPORT_TIMEOUT":"5000","OTEL_SEMCONV_STABILITY_OPT_IN":"http","OTEL_TRACES_EXPORTER":"none","OTEL_TRACES_SAMPLER":"parentbased_always_on","PYROSCOPE_SERVER_ADDRESS":"","TRUSTED_QEMU_IMAGES":"tonistiigi/binfmt"}` | Set additional env vars. | +| runtime.engine.env.CF_TELEMETRY_LOGS_LEVEL | string | `"debug"` | Level of logging for engine | +| runtime.engine.env.CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION | string | `"false"` | Enable OTel HTTP instrumentation. Make sure to sanitize `url.full` and `url.query` span attributes on collector before enabling this flag, as it may contain sensitive information. | +| runtime.engine.env.CF_TELEMETRY_OTEL_ENABLE | string | `"true"` | Enable OpenTelemetry signals (logs, metrics, traces) | +| runtime.engine.env.CF_TELEMETRY_PROMETHEUS_ENABLE | string | `"false"` | Enable Prometheus server (used solely to emit process metrics, if CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS=true) If enabled, make sure to disable legacy metrics by specifying METRICS_PROMETHEUS_ENABLED=false. | +| runtime.engine.env.CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS | string | `"false"` | Enable collecting process metrics | +| runtime.engine.env.CF_TELEMETRY_PROMETHEUS_HOST | string | `"0.0.0.0"` | Host for Prometheus metrics server | +| runtime.engine.env.CF_TELEMETRY_PROMETHEUS_PORT | string | `"9100"` | Port for Prometheus metrics server | +| runtime.engine.env.CF_TELEMETRY_PYROSCOPE_ENABLE | string | `"false"` | Enable Pyroscope profiling. If enabled, the Pyroscope server address must be set in PYROSCOPE_SERVER_ADDRESS. | | runtime.engine.env.CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS | int | `1000` | Interval to check the exec status in the container-logger | | runtime.engine.env.DOCKER_REQUEST_TIMEOUT_MS | int | `30000` | Timeout while doing requests to the Docker daemon | | runtime.engine.env.FORCE_COMPOSE_SERIAL_PULL | bool | `false` | If "true", composition images will be pulled sequentially | | runtime.engine.env.LOGGER_LEVEL | string | `"debug"` | Level of logging for engine | -| runtime.engine.env.LOG_OUTGOING_HTTP_REQUESTS | bool | `false` | Enable debug-level logging of outgoing HTTP/HTTPS requests | -| runtime.engine.env.METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS | bool | `false` | Enable collecting process metrics | -| runtime.engine.env.METRICS_PROMETHEUS_ENABLED | bool | `true` | Enable emitting metrics from engine | -| runtime.engine.env.METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS | bool | `false` | Enable legacy metrics | -| runtime.engine.env.METRICS_PROMETHEUS_HOST | string | `"0.0.0.0"` | Host for Prometheus metrics server | -| runtime.engine.env.METRICS_PROMETHEUS_PORT | int | `9100` | Port for Prometheus metrics server | -| runtime.engine.env.METRICS_PROMETHEUS_SCRAPE_TIMEOUT | string | `"15000"` | The timeout till the engine waits for Prometheus to pull the latest metrics before engine shuts down (in milliseconds) | +| runtime.engine.env.LOG_OUTGOING_HTTP_REQUESTS | bool | `false` | Enable debug-level logging of outgoing HTTP/HTTPS requests. Use with caution, as it may log sensitive information. | +| runtime.engine.env.METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS | bool | `false` | DEPRECATED: Use OpenTelemetry metrics instead. This option enables process metrics and will be removed in a future release. | +| runtime.engine.env.METRICS_PROMETHEUS_ENABLED | bool | `false` | DEPRECATED: Use OpenTelemetry metrics instead. This option enables Prometheus metrics and will be removed in a future release. If enabled, make sure to disable newest metrics by specifying CF_TELEMETRY_PROMETHEUS_ENABLE=false. | +| runtime.engine.env.METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS | bool | `false` | DEPRECATED: Use OpenTelemetry metrics instead. This option enables legacy metrics and will be removed in a future release. | +| runtime.engine.env.METRICS_PROMETHEUS_HOST | string | `"0.0.0.0"` | DEPRECATED: Use OpenTelemetry metrics instead. This options sets the host for Prometheus metrics server and will be removed in a future release. | +| runtime.engine.env.METRICS_PROMETHEUS_PORT | int | `9100` | DEPRECATED: Use OpenTelemetry metrics instead. This options sets the port for Prometheus metrics server and will be removed in a future release. | +| runtime.engine.env.METRICS_PROMETHEUS_SCRAPE_TIMEOUT | string | `"15000"` | DEPRECATED: Use OpenTelemetry metrics instead. This options sets exit timeout for Prometheus metrics server and will be removed in a future release. If set, the engine will wait ms for the scrape before exiting. | +| runtime.engine.env.METRICS_SCRAPE_TIMEOUT_MS | string | `"0"` | On exit, wait ms for the scrape before exiting. No waiting will be done if set to 0. If OTEL_METRICS_EXPORTER=prometheus, it's recommended to set this to 4×scrape_interval. | +| runtime.engine.env.OTEL_EXPORTER_OTLP_COMPRESSION | string | `"gzip"` | Specifies the compression algorithm to be used for all telemetry data. Ref: https://opentelemetry.io/docs/specs/otel/protocol/exporter/ | +| runtime.engine.env.OTEL_EXPORTER_OTLP_ENDPOINT | string | `"http://localhost:4317"` | Base endpoint URL for all OpenTelemetry signals. Ref: https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/ | +| runtime.engine.env.OTEL_EXPORTER_OTLP_PROTOCOL | string | `"grpc"` | Specifies the OTLP transport protocol to be used for all telemetry data. Ref: https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/ | +| runtime.engine.env.OTEL_EXPORTER_PROMETHEUS_HOST | string | `"0.0.0.0"` | Host used by the Prometheus OTel metrics exporter if OTEL_METRICS_EXPORTER=prometheus | +| runtime.engine.env.OTEL_EXPORTER_PROMETHEUS_PORT | string | `"9464"` | Port used by the Prometheus OTel metrics exporter if OTEL_METRICS_EXPORTER=prometheus | +| runtime.engine.env.OTEL_LOGS_EXPORTER | string | `"none"` | OTel Logs exporter to be used. Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ | +| runtime.engine.env.OTEL_METRICS_EXPORTER | string | `"otlp"` | OTel metrics exporter to be used. Set to "prometheus" to export metrics in Prometheus format. If set to "prometheus", it's recommended to set METRICS_SCRAPE_TIMEOUT_MS=4×scrape_interval. Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ | +| runtime.engine.env.OTEL_METRIC_EXPORT_INTERVAL | string | `"10000"` | The time interval (in milliseconds) between the start of two export attempts for push metric exporters, such as "otlp". Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ | +| runtime.engine.env.OTEL_METRIC_EXPORT_TIMEOUT | string | `"5000"` | Maximum allowed time (in milliseconds) to export data for push metric exporters, such as "otlp". Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ | +| runtime.engine.env.OTEL_SEMCONV_STABILITY_OPT_IN | string | `"http"` | Emit the stable HTTP and networking OTel conventions if CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION=true. | +| runtime.engine.env.OTEL_TRACES_EXPORTER | string | `"none"` | OTel traces exporter to be used. Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ | +| runtime.engine.env.OTEL_TRACES_SAMPLER | string | `"parentbased_always_on"` | OTel sampler to be used for traces. Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ | +| runtime.engine.env.PYROSCOPE_SERVER_ADDRESS | string | `""` | Pyroscope server address | | runtime.engine.env.TRUSTED_QEMU_IMAGES | string | `"tonistiigi/binfmt"` | Trusted QEMU images used for docker builds - when left blank defaults to .runtime.engine.runtimeImages.DEFAULT_QEMU_IMAGE value | -| runtime.engine.image | object | `{"digest":"sha256:c8e74362a3462a635cad70ac81877a7d3a0d4833cfaefb8d3b8b4b90e8c95159","pullPolicy":"IfNotPresent","registry":"quay.io","repository":"codefresh/engine","tag":"1.178.3"}` | Set image. | +| runtime.engine.image | object | `{"digest":"sha256:37caef1e58f8d07ed76da753fb46eb59224e723495c1b9081d3ef7e0bc9449f9","pullPolicy":"IfNotPresent","registry":"quay.io","repository":"codefresh/engine","tag":"1.179.1"}` | Set image. | | runtime.engine.nodeSelector | object | `{}` | Set node selector. | | runtime.engine.podAnnotations | object | `{}` | Set pod annotations. | | runtime.engine.podLabels | object | `{}` | Set pod labels. | diff --git a/charts/cf-runtime/README.md.gotmpl b/charts/cf-runtime/README.md.gotmpl index 28e2d783..09b42f73 100644 --- a/charts/cf-runtime/README.md.gotmpl +++ b/charts/cf-runtime/README.md.gotmpl @@ -20,6 +20,7 @@ Helm chart for deploying [Codefresh Runner](https://codefresh.io/docs/docs/insta - [To 7.x](#to-7-x) - [To 7.9.x](#to-7-9-x) - [To 8.x](#to-8-x) + - [To 8.2.x](#to-8-2-x) - [Architecture](#architecture) - [Configuration](#configuration) - [EBS backend volume configuration in AWS](#ebs-backend-volume-configuration) @@ -313,6 +314,29 @@ This means that any existing images in your pipelines that were created using th To avoid operation disruption, you have to identify and convert such deprecated images to modern formats. Tutorial: [https://codefresh.io/docs/docs/kb/articles/upgrade-deprecated-docker-images/](https://codefresh.io/docs/docs/kb/articles/upgrade-deprecated-docker-images/) +### To 8.2.x + +⚠️⚠️⚠️ **BREAKING CHANGE in metrics configuration** ⚠️⚠️⚠️ + +In this release, the `engine` component has migrated its metrics collection to OpenTelemetry, using the *push* model by default. + +You can still switch to the *pull* model by setting the `OTEL_METRICS_EXPORTER=prometheus` environment variable for the `engine`. However, we recommend using the default configuration, as it is better suited for the short-lived nature of Classic Builds and provides more precise and complete metrics. + +View [default chart values](https://artifacthub.io/packages/helm/codefresh-runner/cf-runtime?modal=values&path=runtime.engine.env) for more configuration options. + +The `engine` metrics have also been redesigned to follow OpenTelemetry standards and to deliver more actionable insights. Full list of metrics: https://codefresh.io/docs/docs/installation/runner/classic-runtime-monitoring/ + +For a smooth transition, the previous Prometheus metrics are still available but are now disabled by default. **These legacy metrics will be removed in future releases.** If you need to temporarily retain the old metrics, add the following values to your chart configuration: + +```yaml +runtime: + engine: + env: + CF_TELEMETRY_PROMETHEUS_ENABLE: "false" # Disable new Prometheus metrics to avoid ports conflict and data duplication + CF_TELEMETRY_OTEL_ENABLE: "false" # Disable new OTel metrics to avoid data duplication + METRICS_PROMETHEUS_ENABLED: "true" # Enable old Prometheus metrics +``` + ## Architecture [Codefresh Runner architecture](https://codefresh.io/docs/docs/installation/codefresh-runner/#codefresh-runner-architecture) diff --git a/charts/cf-runtime/templates/runtime/runtime-env-spec-tmpl.yaml b/charts/cf-runtime/templates/runtime/runtime-env-spec-tmpl.yaml index 8c31082a..c7a7e633 100644 --- a/charts/cf-runtime/templates/runtime/runtime-env-spec-tmpl.yaml +++ b/charts/cf-runtime/templates/runtime/runtime-env-spec-tmpl.yaml @@ -9,6 +9,16 @@ {{- if $runtimeImageRegistry }} {{- $_ := set $rootContext.Values.global "imageRegistry" $runtimeImageRegistry }} {{- end }} +{{- $runtimeVersion := coalesce .Values.version .Chart.Version -}} +{{- $runtimeName := include "runtime.runtime-environment-spec.runtime-name" . -}} +{{- $engineVersion := coalesce $engineContext.image.tag "latest" -}} +{{- if $engineContext.image.digest }} + {{- $engineVersion = printf "%s@%s" $engineVersion $engineContext.image.digest -}} +{{- end }} +{{- $dindVersion := coalesce $dindContext.image.tag "latest" -}} +{{- if $dindContext.image.digest }} + {{- $dindVersion = printf "%s@%s" $dindVersion $dindContext.image.digest -}} +{{- end }} metadata: name: {{ include "runtime.runtime-environment-spec.runtime-name" . }} agent: {{ .Values.runtime.agent }} @@ -102,7 +112,10 @@ runtimeScheduler: {{- else }} COSIGN_IMAGE_SIGNER_IMAGE: {{ include (printf "%s.image.name" $cfCommonTplSemver ) (dict "image" (index $engineContext "runtimeImages" "cosign-image-signer") "context" $rootContext) | squote }} {{- end }} - RUNTIME_CHART_VERSION: {{ coalesce .Values.version .Chart.Version }} + RUNTIME_CHART_VERSION: {{ $runtimeVersion }} + CF_SERVICE_NAME: {{ printf "cf-classic-engine" }} + CF_SERVICE_VERSION: {{ $engineVersion }} + OTEL_RESOURCE_ATTRIBUTES: {{ printf "service.name=cf-classic-engine,service.version=%s,service.namespace=cf-classic-runtime,cf.classic.runtime.name=%s,cf.classic.runtime.version=%s" $engineVersion $runtimeName $runtimeVersion }} {{- with $engineContext.userEnvVars }} userEnvVars: {{- toYaml . | nindent 4 }} {{- end }} @@ -162,12 +175,13 @@ dockerDaemonScheduler: {{- with $dindContext.userAccess }} userAccess: {{ . }} {{- end }} - {{- with $dindContext.env }} envVars: + {{- with $dindContext.env }} {{- range $key, $val := . }} {{ $key }}: {{ $val | squote }} {{- end }} {{- end }} + OTEL_RESOURCE_ATTRIBUTES: {{ printf "service.name=cf-classic-dind,service.version=%s,service.namespace=cf-classic-runtime,cf.classic.runtime.name=%s,cf.classic.runtime.version=%s" $dindVersion $runtimeName $runtimeVersion }} cluster: namespace: {{ .Release.Namespace }} serviceAccount: {{ $dindContext.serviceAccount }} diff --git a/charts/cf-runtime/tests/private-registry/private_registry_test.yaml b/charts/cf-runtime/tests/private-registry/private_registry_test.yaml index ab463e1f..7df9f6c5 100644 --- a/charts/cf-runtime/tests/private-registry/private_registry_test.yaml +++ b/charts/cf-runtime/tests/private-registry/private_registry_test.yaml @@ -31,17 +31,39 @@ tests: - run - start envVars: + CF_TELEMETRY_LOGS_LEVEL: 'debug' + CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false' + CF_TELEMETRY_OTEL_ENABLE: 'true' + CF_TELEMETRY_PROMETHEUS_ENABLE: 'false' + CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false' + CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0' + CF_TELEMETRY_PROMETHEUS_PORT: '9100' + CF_TELEMETRY_PYROSCOPE_ENABLE: 'false' CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000' DOCKER_REQUEST_TIMEOUT_MS: '30000' FORCE_COMPOSE_SERIAL_PULL: 'false' LOGGER_LEVEL: 'debug' LOG_OUTGOING_HTTP_REQUESTS: 'false' METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false' - METRICS_PROMETHEUS_ENABLED: 'true' + METRICS_PROMETHEUS_ENABLED: 'false' METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false' METRICS_PROMETHEUS_HOST: '0.0.0.0' METRICS_PROMETHEUS_PORT: '9100' METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000' + METRICS_SCRAPE_TIMEOUT_MS: '0' + OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip' + OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317' + OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc' + OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0' + OTEL_EXPORTER_PROMETHEUS_PORT: '9464' + OTEL_LOGS_EXPORTER: 'none' + OTEL_METRICS_EXPORTER: 'otlp' + OTEL_METRIC_EXPORT_INTERVAL: '10000' + OTEL_METRIC_EXPORT_TIMEOUT: '5000' + OTEL_SEMCONV_STABILITY_OPT_IN: 'http' + OTEL_TRACES_EXPORTER: 'none' + OTEL_TRACES_SAMPLER: 'parentbased_always_on' + PYROSCOPE_SERVER_ADDRESS: '' TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt' COMPOSE_IMAGE: 'somedomain.io/codefresh/compose:tagoverride' CONTAINER_LOGGER_IMAGE: 'somedomain.io/codefresh/cf-container-logger:tagoverride' @@ -59,6 +81,9 @@ tests: GC_BUILDER_IMAGE: 'somedomain.io/codefresh/cf-gc-builder:tagoverride' COSIGN_IMAGE_SIGNER_IMAGE: 'somedomain.io/codefresh/cf-cosign-image-signer:tagoverride' RUNTIME_CHART_VERSION: 1.0.0 + CF_SERVICE_NAME: cf-classic-engine + CF_SERVICE_VERSION: tagoverride + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=my-context/codefresh,cf.classic.runtime.version=1.0.0 workflowLimits: MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS: 600 MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION: 86400 @@ -89,6 +114,8 @@ tests: dindImage: 'somedomain.io/codefresh/dind:tagoverride' imagePullPolicy: IfNotPresent userAccess: true + envVars: + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=my-context/codefresh,cf.classic.runtime.version=1.0.0 cluster: namespace: codefresh serviceAccount: codefresh-engine diff --git a/charts/cf-runtime/tests/runtime/runtime_onprem_test.yaml b/charts/cf-runtime/tests/runtime/runtime_onprem_test.yaml index 24a72d3e..ea26424a 100644 --- a/charts/cf-runtime/tests/runtime/runtime_onprem_test.yaml +++ b/charts/cf-runtime/tests/runtime/runtime_onprem_test.yaml @@ -41,6 +41,14 @@ tests: - two - three envVars: + CF_TELEMETRY_LOGS_LEVEL: 'debug' + CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false' + CF_TELEMETRY_OTEL_ENABLE: 'true' + CF_TELEMETRY_PROMETHEUS_ENABLE: 'false' + CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false' + CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0' + CF_TELEMETRY_PROMETHEUS_PORT: '9100' + CF_TELEMETRY_PYROSCOPE_ENABLE: 'false' CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000' DOCKER_REQUEST_TIMEOUT_MS: '30000' FLOAT_AS_STRING: '12.34' @@ -50,11 +58,25 @@ tests: LOGGER_LEVEL: 'debug' LOG_OUTGOING_HTTP_REQUESTS: 'false' METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false' - METRICS_PROMETHEUS_ENABLED: 'true' + METRICS_PROMETHEUS_ENABLED: 'false' METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false' METRICS_PROMETHEUS_HOST: '0.0.0.0' METRICS_PROMETHEUS_PORT: '9100' METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000' + METRICS_SCRAPE_TIMEOUT_MS: '0' + OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip' + OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317' + OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc' + OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0' + OTEL_EXPORTER_PROMETHEUS_PORT: '9464' + OTEL_LOGS_EXPORTER: 'none' + OTEL_METRICS_EXPORTER: 'otlp' + OTEL_METRIC_EXPORT_INTERVAL: '10000' + OTEL_METRIC_EXPORT_TIMEOUT: '5000' + OTEL_SEMCONV_STABILITY_OPT_IN: 'http' + OTEL_TRACES_EXPORTER: 'none' + OTEL_TRACES_SAMPLER: 'parentbased_always_on' + PYROSCOPE_SERVER_ADDRESS: '' TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt' COMPOSE_IMAGE: 'quay.io/codefresh/compose:tagoverride' CONTAINER_LOGGER_IMAGE: 'quay.io/codefresh/cf-container-logger:tagoverride' @@ -72,6 +94,9 @@ tests: GC_BUILDER_IMAGE: 'quay.io/codefresh/cf-gc-builder:tagoverride' COSIGN_IMAGE_SIGNER_IMAGE: 'quay.io/codefresh/cf-cosign-image-signer:tagoverride' RUNTIME_CHART_VERSION: 1.0.0 + CF_SERVICE_NAME: cf-classic-engine + CF_SERVICE_VERSION: tagoverride + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/my-runtime,cf.classic.runtime.version=1.0.0 workflowLimits: MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS: 600 MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION: 86400 @@ -123,6 +148,7 @@ tests: ALICE: 'BOB' FLOAT_AS_STRING: '12.34' INT: '123' + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/my-runtime,cf.classic.runtime.version=1.0.0 cluster: namespace: codefresh serviceAccount: service-account-override @@ -228,6 +254,14 @@ tests: - two - three envVars: + CF_TELEMETRY_LOGS_LEVEL: 'debug' + CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false' + CF_TELEMETRY_OTEL_ENABLE: 'true' + CF_TELEMETRY_PROMETHEUS_ENABLE: 'false' + CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false' + CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0' + CF_TELEMETRY_PROMETHEUS_PORT: '9100' + CF_TELEMETRY_PYROSCOPE_ENABLE: 'false' CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000' DOCKER_REQUEST_TIMEOUT_MS: '30000' FLOAT_AS_STRING: '12.34' @@ -237,11 +271,25 @@ tests: LOGGER_LEVEL: 'debug' LOG_OUTGOING_HTTP_REQUESTS: 'false' METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false' - METRICS_PROMETHEUS_ENABLED: 'true' + METRICS_PROMETHEUS_ENABLED: 'false' METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false' METRICS_PROMETHEUS_HOST: '0.0.0.0' METRICS_PROMETHEUS_PORT: '9100' METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000' + METRICS_SCRAPE_TIMEOUT_MS: '0' + OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip' + OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317' + OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc' + OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0' + OTEL_EXPORTER_PROMETHEUS_PORT: '9464' + OTEL_LOGS_EXPORTER: 'none' + OTEL_METRICS_EXPORTER: 'otlp' + OTEL_METRIC_EXPORT_INTERVAL: '10000' + OTEL_METRIC_EXPORT_TIMEOUT: '5000' + OTEL_SEMCONV_STABILITY_OPT_IN: 'http' + OTEL_TRACES_EXPORTER: 'none' + OTEL_TRACES_SAMPLER: 'parentbased_always_on' + PYROSCOPE_SERVER_ADDRESS: '' TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt' COMPOSE_IMAGE: 'quay.io/codefresh/compose:tagoverride' CONTAINER_LOGGER_IMAGE: 'quay.io/codefresh/cf-container-logger:tagoverride' @@ -259,6 +307,9 @@ tests: GC_BUILDER_IMAGE: 'quay.io/codefresh/cf-gc-builder:tagoverride' COSIGN_IMAGE_SIGNER_IMAGE: 'quay.io/codefresh/cf-cosign-image-signer:tagoverride' RUNTIME_CHART_VERSION: 1.0.0 + CF_SERVICE_NAME: cf-classic-engine + CF_SERVICE_VERSION: tagoverride + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/default-override,cf.classic.runtime.version=1.0.0 workflowLimits: MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS: 600 MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION: 86400 @@ -310,6 +361,7 @@ tests: ALICE: 'BOB' FLOAT_AS_STRING: '12.34' INT: '123' + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/default-override,cf.classic.runtime.version=1.0.0 cluster: namespace: codefresh serviceAccount: service-account-override diff --git a/charts/cf-runtime/tests/runtime/runtime_test.yaml b/charts/cf-runtime/tests/runtime/runtime_test.yaml index be857aa8..24e0b989 100644 --- a/charts/cf-runtime/tests/runtime/runtime_test.yaml +++ b/charts/cf-runtime/tests/runtime/runtime_test.yaml @@ -36,13 +36,21 @@ tests: agent: true runtimeScheduler: type: KubernetesPod - image: 'quay.io/codefresh/engine:tagoverride' + image: 'quay.io/codefresh/engine:tagoverride@sha256:123' imagePullPolicy: Always command: - one - two - three envVars: + CF_TELEMETRY_LOGS_LEVEL: 'debug' + CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false' + CF_TELEMETRY_OTEL_ENABLE: 'true' + CF_TELEMETRY_PROMETHEUS_ENABLE: 'false' + CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false' + CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0' + CF_TELEMETRY_PROMETHEUS_PORT: '9100' + CF_TELEMETRY_PYROSCOPE_ENABLE: 'false' CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000' DOCKER_REQUEST_TIMEOUT_MS: '30000' FLOAT: '12.34' @@ -52,14 +60,28 @@ tests: LOGGER_LEVEL: 'debug' LOG_OUTGOING_HTTP_REQUESTS: 'false' METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false' - METRICS_PROMETHEUS_ENABLED: 'true' + METRICS_PROMETHEUS_ENABLED: 'false' METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false' METRICS_PROMETHEUS_HOST: '0.0.0.0' METRICS_PROMETHEUS_PORT: '9100' METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000' + METRICS_SCRAPE_TIMEOUT_MS: '0' + OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip' + OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317' + OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc' + OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0' + OTEL_EXPORTER_PROMETHEUS_PORT: '9464' + OTEL_LOGS_EXPORTER: 'none' + OTEL_METRICS_EXPORTER: 'otlp' + OTEL_METRIC_EXPORT_INTERVAL: '10000' + OTEL_METRIC_EXPORT_TIMEOUT: '5000' + OTEL_SEMCONV_STABILITY_OPT_IN: 'http' + OTEL_TRACES_EXPORTER: 'none' + OTEL_TRACES_SAMPLER: 'parentbased_always_on' + PYROSCOPE_SERVER_ADDRESS: '' TRUSTED_QEMU_IMAGES: 'my-registry/tonistiigi/binfmt' COMPOSE_IMAGE: 'quay.io/codefresh/compose:tagoverrideold' - CONTAINER_LOGGER_IMAGE: 'quay.io/codefresh/cf-container-logger:tagoverride' + CONTAINER_LOGGER_IMAGE: 'quay.io/codefresh/cf-container-logger:tagoverride@sha256:123' DEFAULT_QEMU_IMAGE: 'docker.io/tonistiigi/binfmt:tagoverride' DOCKER_BUILDER_IMAGE: 'quay.io/codefresh/cf-docker-builder:tagoverride' DOCKER_PULLER_IMAGE: 'quay.io/codefresh/cf-docker-puller:tagoverride' @@ -74,6 +96,9 @@ tests: GC_BUILDER_IMAGE: 'quay.io/codefresh/cf-gc-builder:tagoverride' COSIGN_IMAGE_SIGNER_IMAGE: 'quay.io/codefresh/cf-cosign-image-signer:tagoverride' RUNTIME_CHART_VERSION: 1.0.0 + CF_SERVICE_NAME: cf-classic-engine + CF_SERVICE_VERSION: tagoverride@sha256:123 + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride@sha256:123,service.namespace=cf-classic-runtime,cf.classic.runtime.name=my-context/codefresh,cf.classic.runtime.version=1.0.0 userEnvVars: - name: ALICE valueFrom: @@ -134,6 +159,7 @@ tests: ALICE: 'BOB' FLOAT: '12.34' INT_AS_STRING: '123' + OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=my-context/codefresh,cf.classic.runtime.version=1.0.0 cluster: namespace: codefresh serviceAccount: service-account-override diff --git a/charts/cf-runtime/tests/runtime/runtime_values.yaml b/charts/cf-runtime/tests/runtime/runtime_values.yaml index 6a7e5f21..6b83c523 100644 --- a/charts/cf-runtime/tests/runtime/runtime_values.yaml +++ b/charts/cf-runtime/tests/runtime/runtime_values.yaml @@ -56,7 +56,7 @@ runtime: image: tag: tagoverride pullPolicy: Always - digest: "" + digest: "sha256:123" command: - one - two @@ -76,7 +76,7 @@ runtime: digest: "" container-logger: tag: tagoverride - digest: "" + digest: "sha256:123" default-qemu: tag: tagoverride digest: "" diff --git a/charts/cf-runtime/values.yaml b/charts/cf-runtime/values.yaml index 456e3cca..f0305698 100644 --- a/charts/cf-runtime/values.yaml +++ b/charts/cf-runtime/values.yaml @@ -503,9 +503,9 @@ runtime: image: registry: quay.io repository: codefresh/engine - tag: 1.178.3 + tag: 1.179.1 pullPolicy: IfNotPresent - digest: sha256:c8e74362a3462a635cad70ac81877a7d3a0d4833cfaefb8d3b8b4b90e8c95159 + digest: sha256:37caef1e58f8d07ed76da753fb46eb59224e723495c1b9081d3ef7e0bc9449f9 # -- Set container command. command: - npm @@ -620,28 +620,91 @@ runtime: # DEFAULT_QEMU_IMAGE: tonistiigi/binfmt:qemu-v9.2.2@sha256:1b804311fe87047a4c96d38b4b3ef6f62fca8cd125265917a9e3dc3c996c39e6 # -- Set additional env vars. env: - # -- Interval to check the exec status in the container-logger - CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: 1000 - # -- Timeout while doing requests to the Docker daemon - DOCKER_REQUEST_TIMEOUT_MS: 30000 - # -- If "true", composition images will be pulled sequentially - FORCE_COMPOSE_SERIAL_PULL: false + # -- Telemetry configuration + # -- Level of logging for engine + CF_TELEMETRY_LOGS_LEVEL: 'debug' + # -- Enable OpenTelemetry signals (logs, metrics, traces) + CF_TELEMETRY_OTEL_ENABLE: 'true' + # -- Enable OTel HTTP instrumentation. + # Make sure to sanitize `url.full` and `url.query` span attributes on collector before enabling this flag, as it may contain sensitive information. + CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false' + # -- Enable Prometheus server (used solely to emit process metrics, if CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS=true) + # If enabled, make sure to disable legacy metrics by specifying METRICS_PROMETHEUS_ENABLED=false. + CF_TELEMETRY_PROMETHEUS_ENABLE: 'false' + # -- Enable collecting process metrics + CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false' + # -- Host for Prometheus metrics server + CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0' + # -- Port for Prometheus metrics server + CF_TELEMETRY_PROMETHEUS_PORT: '9100' + # -- Enable Pyroscope profiling. If enabled, the Pyroscope server address must be set in PYROSCOPE_SERVER_ADDRESS. + CF_TELEMETRY_PYROSCOPE_ENABLE: 'false' + # -- Pyroscope server address + PYROSCOPE_SERVER_ADDRESS: '' # -- Level of logging for engine LOGGER_LEVEL: debug - # -- Enable debug-level logging of outgoing HTTP/HTTPS requests + # -- Enable debug-level logging of outgoing HTTP/HTTPS requests. + # Use with caution, as it may log sensitive information. LOG_OUTGOING_HTTP_REQUESTS: false - # -- Enable emitting metrics from engine - METRICS_PROMETHEUS_ENABLED: true - # -- Enable legacy metrics + # -- On exit, wait ms for the scrape before exiting. No waiting will be done if set to 0. If OTEL_METRICS_EXPORTER=prometheus, it's recommended to set this to 4×scrape_interval. + METRICS_SCRAPE_TIMEOUT_MS: '0' + # -- Base endpoint URL for all OpenTelemetry signals. + # Ref: https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/ + OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317' + # -- Specifies the OTLP transport protocol to be used for all telemetry data. + # Ref: https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/ + OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc' + # -- Specifies the compression algorithm to be used for all telemetry data. + # Ref: https://opentelemetry.io/docs/specs/otel/protocol/exporter/ + OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip' + # -- OTel Logs exporter to be used. + # Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ + OTEL_LOGS_EXPORTER: 'none' + # -- OTel traces exporter to be used. + # Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ + OTEL_TRACES_EXPORTER: 'none' + # -- OTel sampler to be used for traces. + # Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ + OTEL_TRACES_SAMPLER: 'parentbased_always_on' + # -- OTel metrics exporter to be used. Set to "prometheus" to export metrics in Prometheus format. If set to "prometheus", it's recommended to set METRICS_SCRAPE_TIMEOUT_MS=4×scrape_interval. + # Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ + OTEL_METRICS_EXPORTER: 'otlp' + # -- The time interval (in milliseconds) between the start of two export attempts for push metric exporters, such as "otlp". + # Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ + OTEL_METRIC_EXPORT_INTERVAL: '10000' + # -- Maximum allowed time (in milliseconds) to export data for push metric exporters, such as "otlp". + # Ref: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ + OTEL_METRIC_EXPORT_TIMEOUT: '5000' + # -- Host used by the Prometheus OTel metrics exporter if OTEL_METRICS_EXPORTER=prometheus + OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0' + # -- Port used by the Prometheus OTel metrics exporter if OTEL_METRICS_EXPORTER=prometheus + OTEL_EXPORTER_PROMETHEUS_PORT: '9464' + # -- Emit the stable HTTP and networking OTel conventions if CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION=true. + OTEL_SEMCONV_STABILITY_OPT_IN: 'http' + # + # -- Deprecated metrics configuration + # -- DEPRECATED: Use OpenTelemetry metrics instead. This option enables Prometheus metrics and will be removed in a future release. + # If enabled, make sure to disable newest metrics by specifying CF_TELEMETRY_PROMETHEUS_ENABLE=false. + METRICS_PROMETHEUS_ENABLED: false + # -- DEPRECATED: Use OpenTelemetry metrics instead. This option enables legacy metrics and will be removed in a future release. METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: false - # -- Enable collecting process metrics + # -- DEPRECATED: Use OpenTelemetry metrics instead. This option enables process metrics and will be removed in a future release. METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: false - # -- Host for Prometheus metrics server + # -- DEPRECATED: Use OpenTelemetry metrics instead. This options sets the host for Prometheus metrics server and will be removed in a future release. METRICS_PROMETHEUS_HOST: '0.0.0.0' - # -- Port for Prometheus metrics server + # -- DEPRECATED: Use OpenTelemetry metrics instead. This options sets the port for Prometheus metrics server and will be removed in a future release. METRICS_PROMETHEUS_PORT: 9100 - # -- The timeout till the engine waits for Prometheus to pull the latest metrics before engine shuts down (in milliseconds) + # -- DEPRECATED: Use OpenTelemetry metrics instead. This options sets exit timeout for Prometheus metrics server and will be removed in a future release. + # If set, the engine will wait ms for the scrape before exiting. METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000' + # + # -- Engine operation configuration + # -- Interval to check the exec status in the container-logger + CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: 1000 + # -- Timeout while doing requests to the Docker daemon + DOCKER_REQUEST_TIMEOUT_MS: 30000 + # -- If "true", composition images will be pulled sequentially + FORCE_COMPOSE_SERIAL_PULL: false # -- Trusted QEMU images used for docker builds - when left blank defaults to .runtime.engine.runtimeImages.DEFAULT_QEMU_IMAGE value TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt' # -- Set workflow limits. @@ -916,7 +979,7 @@ serviceMonitor: relabelings: - action: labelmap regex: __meta_kubernetes_pod_label_(.+) -# -- Add podMonitor (for engine pods) +# -- Add podMonitor # @default -- See below podMonitor: main: @@ -929,6 +992,8 @@ podMonitor: podMetricsEndpoints: - path: /metrics targetPort: 9100 + - path: /metrics + targetPort: 9464 runner: # -- Enable pod monitor for runner pod enabled: false