Skip to content

Commit 74cdf61

Browse files
build: upgrade engine (#603)
1 parent 8ef5452 commit 74cdf61

File tree

9 files changed

+305
-41
lines changed

9 files changed

+305
-41
lines changed

charts/cf-runtime/Chart.yaml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: v2
22
description: A Helm chart for Codefresh Runner
33
name: cf-runtime
4-
version: 8.1.0
4+
version: 8.2.0
55
keywords:
66
- codefresh
77
- runner
@@ -17,8 +17,18 @@ annotations:
1717
artifacthub.io/containsSecurityUpdates: "false"
1818
# Supported kinds: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`:
1919
artifacthub.io/changes: |
20+
- kind: changed
21+
description: "Update \"engine\" to version 1.179.1."
2022
- kind: added
21-
description: "Added MAXIMUM_POST_STEPS_GRACE_PERIOD_MINUTES configuration for engine which controls maximum time for internal build chores before termination."
23+
description: "Add support for OpenTelemetry signals: metrics, logs, traces."
24+
- kind: added
25+
description: "Add support for Pyroscope profiles."
26+
- kind: changed
27+
description: "Redesign \"engine\" metrics to follow OpenTelemetry standards and provide more comprehensive insights about Classic Build execution. Please read upgrade notes for more details."
28+
- kind: deprecated
29+
description: "Deprecate legacy Prometheus metrics in favor of new OpenTelemetry metrics in \"engine\". Please read upgrade notes for more details."
30+
- kind: changed
31+
description: "Improve observability of build's \"Initializing Process\" step by providing more logs and more detailed status of the step."
2232
dependencies:
2333
- name: cf-common
2434
repository: oci://quay.io/codefresh/charts

charts/cf-runtime/README.md

Lines changed: 58 additions & 12 deletions
Large diffs are not rendered by default.

charts/cf-runtime/README.md.gotmpl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Helm chart for deploying [Codefresh Runner](https://codefresh.io/docs/docs/insta
2020
- [To 7.x](#to-7-x)
2121
- [To 7.9.x](#to-7-9-x)
2222
- [To 8.x](#to-8-x)
23+
- [To 8.2.x](#to-8-2-x)
2324
- [Architecture](#architecture)
2425
- [Configuration](#configuration)
2526
- [EBS backend volume configuration in AWS](#ebs-backend-volume-configuration)
@@ -313,6 +314,29 @@ This means that any existing images in your pipelines that were created using th
313314

314315
To avoid operation disruption, you have to identify and convert such deprecated images to modern formats. Tutorial: [https://codefresh.io/docs/docs/kb/articles/upgrade-deprecated-docker-images/](https://codefresh.io/docs/docs/kb/articles/upgrade-deprecated-docker-images/)
315316

317+
### To 8.2.x
318+
319+
⚠️⚠️⚠️ **BREAKING CHANGE in metrics configuration** ⚠️⚠️⚠️
320+
321+
In this release, the `engine` component has migrated its metrics collection to OpenTelemetry, using the *push* model by default.
322+
323+
You can still switch to the *pull* model by setting the `OTEL_METRICS_EXPORTER=prometheus` environment variable for the `engine`. However, we recommend using the default configuration, as it is better suited for the short-lived nature of Classic Builds and provides more precise and complete metrics.
324+
325+
View [default chart values](https://artifacthub.io/packages/helm/codefresh-runner/cf-runtime?modal=values&path=runtime.engine.env) for more configuration options.
326+
327+
The `engine` metrics have also been redesigned to follow OpenTelemetry standards and to deliver more actionable insights. Full list of metrics: https://codefresh.io/docs/docs/installation/runner/classic-runtime-monitoring/
328+
329+
For a smooth transition, the previous Prometheus metrics are still available but are now disabled by default. **These legacy metrics will be removed in future releases.** If you need to temporarily retain the old metrics, add the following values to your chart configuration:
330+
331+
```yaml
332+
runtime:
333+
engine:
334+
env:
335+
CF_TELEMETRY_PROMETHEUS_ENABLE: "false" # Disable new Prometheus metrics to avoid ports conflict and data duplication
336+
CF_TELEMETRY_OTEL_ENABLE: "false" # Disable new OTel metrics to avoid data duplication
337+
METRICS_PROMETHEUS_ENABLED: "true" # Enable old Prometheus metrics
338+
```
339+
316340
## Architecture
317341

318342
[Codefresh Runner architecture](https://codefresh.io/docs/docs/installation/codefresh-runner/#codefresh-runner-architecture)

charts/cf-runtime/templates/runtime/runtime-env-spec-tmpl.yaml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@
99
{{- if $runtimeImageRegistry }}
1010
{{- $_ := set $rootContext.Values.global "imageRegistry" $runtimeImageRegistry }}
1111
{{- end }}
12+
{{- $runtimeVersion := coalesce .Values.version .Chart.Version -}}
13+
{{- $runtimeName := include "runtime.runtime-environment-spec.runtime-name" . -}}
14+
{{- $engineVersion := coalesce $engineContext.image.tag "latest" -}}
15+
{{- if $engineContext.image.digest }}
16+
{{- $engineVersion = printf "%s@%s" $engineVersion $engineContext.image.digest -}}
17+
{{- end }}
18+
{{- $dindVersion := coalesce $dindContext.image.tag "latest" -}}
19+
{{- if $dindContext.image.digest }}
20+
{{- $dindVersion = printf "%s@%s" $dindVersion $dindContext.image.digest -}}
21+
{{- end }}
1222
metadata:
1323
name: {{ include "runtime.runtime-environment-spec.runtime-name" . }}
1424
agent: {{ .Values.runtime.agent }}
@@ -102,7 +112,10 @@ runtimeScheduler:
102112
{{- else }}
103113
COSIGN_IMAGE_SIGNER_IMAGE: {{ include (printf "%s.image.name" $cfCommonTplSemver ) (dict "image" (index $engineContext "runtimeImages" "cosign-image-signer") "context" $rootContext) | squote }}
104114
{{- end }}
105-
RUNTIME_CHART_VERSION: {{ coalesce .Values.version .Chart.Version }}
115+
RUNTIME_CHART_VERSION: {{ $runtimeVersion }}
116+
CF_SERVICE_NAME: {{ printf "cf-classic-engine" }}
117+
CF_SERVICE_VERSION: {{ $engineVersion }}
118+
OTEL_RESOURCE_ATTRIBUTES: {{ printf "service.name=cf-classic-engine,service.version=%s,service.namespace=cf-classic-runtime,cf.classic.runtime.name=%s,cf.classic.runtime.version=%s" $engineVersion $runtimeName $runtimeVersion }}
106119
{{- with $engineContext.userEnvVars }}
107120
userEnvVars: {{- toYaml . | nindent 4 }}
108121
{{- end }}
@@ -162,12 +175,13 @@ dockerDaemonScheduler:
162175
{{- with $dindContext.userAccess }}
163176
userAccess: {{ . }}
164177
{{- end }}
165-
{{- with $dindContext.env }}
166178
envVars:
179+
{{- with $dindContext.env }}
167180
{{- range $key, $val := . }}
168181
{{ $key }}: {{ $val | squote }}
169182
{{- end }}
170183
{{- end }}
184+
OTEL_RESOURCE_ATTRIBUTES: {{ printf "service.name=cf-classic-dind,service.version=%s,service.namespace=cf-classic-runtime,cf.classic.runtime.name=%s,cf.classic.runtime.version=%s" $dindVersion $runtimeName $runtimeVersion }}
171185
cluster:
172186
namespace: {{ .Release.Namespace }}
173187
serviceAccount: {{ $dindContext.serviceAccount }}

charts/cf-runtime/tests/private-registry/private_registry_test.yaml

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,39 @@ tests:
3131
- run
3232
- start
3333
envVars:
34+
CF_TELEMETRY_LOGS_LEVEL: 'debug'
35+
CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false'
36+
CF_TELEMETRY_OTEL_ENABLE: 'true'
37+
CF_TELEMETRY_PROMETHEUS_ENABLE: 'false'
38+
CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false'
39+
CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0'
40+
CF_TELEMETRY_PROMETHEUS_PORT: '9100'
41+
CF_TELEMETRY_PYROSCOPE_ENABLE: 'false'
3442
CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000'
3543
DOCKER_REQUEST_TIMEOUT_MS: '30000'
3644
FORCE_COMPOSE_SERIAL_PULL: 'false'
3745
LOGGER_LEVEL: 'debug'
3846
LOG_OUTGOING_HTTP_REQUESTS: 'false'
3947
METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false'
40-
METRICS_PROMETHEUS_ENABLED: 'true'
48+
METRICS_PROMETHEUS_ENABLED: 'false'
4149
METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false'
4250
METRICS_PROMETHEUS_HOST: '0.0.0.0'
4351
METRICS_PROMETHEUS_PORT: '9100'
4452
METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000'
53+
METRICS_SCRAPE_TIMEOUT_MS: '0'
54+
OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip'
55+
OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317'
56+
OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc'
57+
OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0'
58+
OTEL_EXPORTER_PROMETHEUS_PORT: '9464'
59+
OTEL_LOGS_EXPORTER: 'none'
60+
OTEL_METRICS_EXPORTER: 'otlp'
61+
OTEL_METRIC_EXPORT_INTERVAL: '10000'
62+
OTEL_METRIC_EXPORT_TIMEOUT: '5000'
63+
OTEL_SEMCONV_STABILITY_OPT_IN: 'http'
64+
OTEL_TRACES_EXPORTER: 'none'
65+
OTEL_TRACES_SAMPLER: 'parentbased_always_on'
66+
PYROSCOPE_SERVER_ADDRESS: ''
4567
TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt'
4668
COMPOSE_IMAGE: 'somedomain.io/codefresh/compose:tagoverride'
4769
CONTAINER_LOGGER_IMAGE: 'somedomain.io/codefresh/cf-container-logger:tagoverride'
@@ -59,6 +81,9 @@ tests:
5981
GC_BUILDER_IMAGE: 'somedomain.io/codefresh/cf-gc-builder:tagoverride'
6082
COSIGN_IMAGE_SIGNER_IMAGE: 'somedomain.io/codefresh/cf-cosign-image-signer:tagoverride'
6183
RUNTIME_CHART_VERSION: 1.0.0
84+
CF_SERVICE_NAME: cf-classic-engine
85+
CF_SERVICE_VERSION: tagoverride
86+
OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=my-context/codefresh,cf.classic.runtime.version=1.0.0
6287
workflowLimits:
6388
MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS: 600
6489
MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION: 86400
@@ -89,6 +114,8 @@ tests:
89114
dindImage: 'somedomain.io/codefresh/dind:tagoverride'
90115
imagePullPolicy: IfNotPresent
91116
userAccess: true
117+
envVars:
118+
OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=my-context/codefresh,cf.classic.runtime.version=1.0.0
92119
cluster:
93120
namespace: codefresh
94121
serviceAccount: codefresh-engine

charts/cf-runtime/tests/runtime/runtime_onprem_test.yaml

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ tests:
4141
- two
4242
- three
4343
envVars:
44+
CF_TELEMETRY_LOGS_LEVEL: 'debug'
45+
CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false'
46+
CF_TELEMETRY_OTEL_ENABLE: 'true'
47+
CF_TELEMETRY_PROMETHEUS_ENABLE: 'false'
48+
CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false'
49+
CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0'
50+
CF_TELEMETRY_PROMETHEUS_PORT: '9100'
51+
CF_TELEMETRY_PYROSCOPE_ENABLE: 'false'
4452
CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000'
4553
DOCKER_REQUEST_TIMEOUT_MS: '30000'
4654
FLOAT_AS_STRING: '12.34'
@@ -50,11 +58,25 @@ tests:
5058
LOGGER_LEVEL: 'debug'
5159
LOG_OUTGOING_HTTP_REQUESTS: 'false'
5260
METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false'
53-
METRICS_PROMETHEUS_ENABLED: 'true'
61+
METRICS_PROMETHEUS_ENABLED: 'false'
5462
METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false'
5563
METRICS_PROMETHEUS_HOST: '0.0.0.0'
5664
METRICS_PROMETHEUS_PORT: '9100'
5765
METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000'
66+
METRICS_SCRAPE_TIMEOUT_MS: '0'
67+
OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip'
68+
OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317'
69+
OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc'
70+
OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0'
71+
OTEL_EXPORTER_PROMETHEUS_PORT: '9464'
72+
OTEL_LOGS_EXPORTER: 'none'
73+
OTEL_METRICS_EXPORTER: 'otlp'
74+
OTEL_METRIC_EXPORT_INTERVAL: '10000'
75+
OTEL_METRIC_EXPORT_TIMEOUT: '5000'
76+
OTEL_SEMCONV_STABILITY_OPT_IN: 'http'
77+
OTEL_TRACES_EXPORTER: 'none'
78+
OTEL_TRACES_SAMPLER: 'parentbased_always_on'
79+
PYROSCOPE_SERVER_ADDRESS: ''
5880
TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt'
5981
COMPOSE_IMAGE: 'quay.io/codefresh/compose:tagoverride'
6082
CONTAINER_LOGGER_IMAGE: 'quay.io/codefresh/cf-container-logger:tagoverride'
@@ -72,6 +94,9 @@ tests:
7294
GC_BUILDER_IMAGE: 'quay.io/codefresh/cf-gc-builder:tagoverride'
7395
COSIGN_IMAGE_SIGNER_IMAGE: 'quay.io/codefresh/cf-cosign-image-signer:tagoverride'
7496
RUNTIME_CHART_VERSION: 1.0.0
97+
CF_SERVICE_NAME: cf-classic-engine
98+
CF_SERVICE_VERSION: tagoverride
99+
OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/my-runtime,cf.classic.runtime.version=1.0.0
75100
workflowLimits:
76101
MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS: 600
77102
MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION: 86400
@@ -123,6 +148,7 @@ tests:
123148
ALICE: 'BOB'
124149
FLOAT_AS_STRING: '12.34'
125150
INT: '123'
151+
OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/my-runtime,cf.classic.runtime.version=1.0.0
126152
cluster:
127153
namespace: codefresh
128154
serviceAccount: service-account-override
@@ -228,6 +254,14 @@ tests:
228254
- two
229255
- three
230256
envVars:
257+
CF_TELEMETRY_LOGS_LEVEL: 'debug'
258+
CF_TELEMETRY_OTEL_ALLOW_HTTP_INSTRUMENTATION: 'false'
259+
CF_TELEMETRY_OTEL_ENABLE: 'true'
260+
CF_TELEMETRY_PROMETHEUS_ENABLE: 'false'
261+
CF_TELEMETRY_PROMETHEUS_ENABLE_PROCESS_METRICS: 'false'
262+
CF_TELEMETRY_PROMETHEUS_HOST: '0.0.0.0'
263+
CF_TELEMETRY_PROMETHEUS_PORT: '9100'
264+
CF_TELEMETRY_PYROSCOPE_ENABLE: 'false'
231265
CONTAINER_LOGGER_EXEC_CHECK_INTERVAL_MS: '1000'
232266
DOCKER_REQUEST_TIMEOUT_MS: '30000'
233267
FLOAT_AS_STRING: '12.34'
@@ -237,11 +271,25 @@ tests:
237271
LOGGER_LEVEL: 'debug'
238272
LOG_OUTGOING_HTTP_REQUESTS: 'false'
239273
METRICS_PROMETHEUS_COLLECT_PROCESS_METRICS: 'false'
240-
METRICS_PROMETHEUS_ENABLED: 'true'
274+
METRICS_PROMETHEUS_ENABLED: 'false'
241275
METRICS_PROMETHEUS_ENABLE_LEGACY_METRICS: 'false'
242276
METRICS_PROMETHEUS_HOST: '0.0.0.0'
243277
METRICS_PROMETHEUS_PORT: '9100'
244278
METRICS_PROMETHEUS_SCRAPE_TIMEOUT: '15000'
279+
METRICS_SCRAPE_TIMEOUT_MS: '0'
280+
OTEL_EXPORTER_OTLP_COMPRESSION: 'gzip'
281+
OTEL_EXPORTER_OTLP_ENDPOINT: 'http://localhost:4317'
282+
OTEL_EXPORTER_OTLP_PROTOCOL: 'grpc'
283+
OTEL_EXPORTER_PROMETHEUS_HOST: '0.0.0.0'
284+
OTEL_EXPORTER_PROMETHEUS_PORT: '9464'
285+
OTEL_LOGS_EXPORTER: 'none'
286+
OTEL_METRICS_EXPORTER: 'otlp'
287+
OTEL_METRIC_EXPORT_INTERVAL: '10000'
288+
OTEL_METRIC_EXPORT_TIMEOUT: '5000'
289+
OTEL_SEMCONV_STABILITY_OPT_IN: 'http'
290+
OTEL_TRACES_EXPORTER: 'none'
291+
OTEL_TRACES_SAMPLER: 'parentbased_always_on'
292+
PYROSCOPE_SERVER_ADDRESS: ''
245293
TRUSTED_QEMU_IMAGES: 'tonistiigi/binfmt'
246294
COMPOSE_IMAGE: 'quay.io/codefresh/compose:tagoverride'
247295
CONTAINER_LOGGER_IMAGE: 'quay.io/codefresh/cf-container-logger:tagoverride'
@@ -259,6 +307,9 @@ tests:
259307
GC_BUILDER_IMAGE: 'quay.io/codefresh/cf-gc-builder:tagoverride'
260308
COSIGN_IMAGE_SIGNER_IMAGE: 'quay.io/codefresh/cf-cosign-image-signer:tagoverride'
261309
RUNTIME_CHART_VERSION: 1.0.0
310+
CF_SERVICE_NAME: cf-classic-engine
311+
CF_SERVICE_VERSION: tagoverride
312+
OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-engine,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/default-override,cf.classic.runtime.version=1.0.0
262313
workflowLimits:
263314
MAXIMUM_ALLOWED_TIME_BEFORE_PRE_STEPS_SUCCESS: 600
264315
MAXIMUM_ALLOWED_WORKFLOW_AGE_BEFORE_TERMINATION: 86400
@@ -310,6 +361,7 @@ tests:
310361
ALICE: 'BOB'
311362
FLOAT_AS_STRING: '12.34'
312363
INT: '123'
364+
OTEL_RESOURCE_ATTRIBUTES: service.name=cf-classic-dind,service.version=tagoverride,service.namespace=cf-classic-runtime,cf.classic.runtime.name=system/default-override,cf.classic.runtime.version=1.0.0
313365
cluster:
314366
namespace: codefresh
315367
serviceAccount: service-account-override

0 commit comments

Comments
 (0)