operator-framework
diff --git a/‎.github/workflows/e2e-test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e-test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/content/en/blog/releases/v5-3-release.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/content/en/blog/releases/v5-3-release.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/content/en/docs/documentation/operations/_index.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/content/en/docs/documentation/operations/_index.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/content/en/docs/documentation/operations/health-probes.md‎
Lines changed: 111 additions & 0 deletions b/‎docs/content/en/docs/documentation/operations/health-probes.md‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎docs/content/en/docs/documentation/operations/helm-chart.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/content/en/docs/documentation/operations/helm-chart.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/content/en/docs/documentation/operations/metrics.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/content/en/docs/documentation/operations/metrics.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎helm/generic-helm-chart/templates/deployment.yaml‎
Lines changed: 36 additions & 0 deletions b/‎helm/generic-helm-chart/templates/deployment.yaml‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎helm/generic-helm-chart/tests/deployment_test.yaml‎
Lines changed: 54 additions & 0 deletions b/‎helm/generic-helm-chart/tests/deployment_test.yaml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎helm/generic-helm-chart/values.yaml‎
Lines changed: 32 additions & 0 deletions b/‎helm/generic-helm-chart/values.yaml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎observability/install-observability.sh‎
Lines changed: 1 addition & 1 deletion b/‎observability/install-observability.sh‎
Lines changed: 1 addition & 1 deletion
@@ -24,7 +24,7 @@ jobs:
           - "sample-operators/tomcat-operator"
           - "sample-operators/webpage"
           - "sample-operators/leader-election"
-          - "sample-operators/metrics-processing"
+          - "sample-operators/operations"
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
 
@@ -97,7 +97,7 @@ A ready-to-use **Grafana dashboard** is included at
 [`observability/josdk-operator-metrics-dashboard.json`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/observability/josdk-operator-metrics-dashboard.json).
 
 The
-[`metrics-processing` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/metrics-processing)
+[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations)
 provides a complete end-to-end setup with Prometheus, Grafana, and an OpenTelemetry Collector,
 installable via `observability/install-observability.sh`. This is a good starting point for
 verifying metrics in a real cluster.
 
@@ -4,3 +4,7 @@ weight: 80
 ---
 
 This section covers operations-related features for running and managing operators in production.
+
+See the
+[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations)
+for a complete working example that demonstrates health probes, metrics, and Helm-based deployment.
@@ -0,0 +1,111 @@
+---
+title: Health Probes
+weight: 85
+---
+
+Operators running in Kubernetes should expose health probe endpoints so that the kubelet can detect startup
+failures and runtime degradation. JOSDK provides the building blocks through its
+[`RuntimeInfo`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/RuntimeInfo.java)
+API.
+
+## RuntimeInfo
+
+`RuntimeInfo` is available via `operator.getRuntimeInfo()` and exposes:
+
+| Method | Purpose |
+|---|---|
+| `isStarted()` | `true` once the operator and all its controllers have fully started |
+| `allEventSourcesAreHealthy()` | `true` when every registered event source (informers, polling sources, etc.) reports a healthy status |
+| `unhealthyEventSources()` | returns a map of controller name → unhealthy event sources, useful for diagnostics |
+| `unhealthyInformerWrappingEventSourceHealthIndicator()` | returns a map of controller name → unhealthy informer-wrapping event sources, each exposing per-informer details via `InformerHealthIndicator` (`hasSynced()`, `isWatching()`, `isRunning()`, `getTargetNamespace()`) |
+
+In most cases a single readiness probe backed by `allEventSourcesAreHealthy()` is sufficient: before the
+operator has fully started the informers will not have synced yet, so the check naturally covers the startup
+case as well. Once running, it detects runtime degradation such as a lost watch connection.
+
+### Fine-Grained Informer Diagnostics
+
+For advanced use cases — such as exposing per-informer health in a diagnostic endpoint or logging which
+specific namespace lost its watch — `unhealthyInformerWrappingEventSourceHealthIndicator()` gives access to
+individual `InformerHealthIndicator` instances. Each indicator exposes `hasSynced()`, `isWatching()`,
+`isRunning()`, and `getTargetNamespace()`. This is typically not needed for a standard health probe but can
+be valuable for operational dashboards or troubleshooting.
+
+## Setting Up a Probe Endpoint
+
+The example below uses [Jetty](https://eclipse.dev/jetty/) to expose a `/healthz` endpoint. Any HTTP
+server library works — the key is calling the `RuntimeInfo` methods to determine the response code.
+
+```java
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.server.handler.ContextHandler;
+
+Operator operator = new Operator();
+operator.register(new MyReconciler());
+
+// start the health server before the operator so probes can be queried during startup
+var health = new ContextHandler(new HealthHandler(operator), "/healthz");
+Server server = new Server(8080);
+server.setHandler(health);
+server.start();
+
+operator.start();
+```
+
+Where `HealthHandler` extends `org.eclipse.jetty.server.Handler.Abstract` and checks
+`operator.getRuntimeInfo().allEventSourcesAreHealthy()`.
+
+See the
+[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations)
+for a complete working example.
+
+## Kubernetes Deployment Configuration
+
+Once your operator exposes the probe endpoint, configure probes in your Deployment manifest. Both the
+startup and readiness probes can point to the same `/healthz` endpoint — the startup probe simply uses a
+higher `failureThreshold` to give the operator time to initialize:
+
+```yaml
+containers:
+- name: operator
+  ports:
+  - name: probes
+    containerPort: 8080
+  startupProbe:
+    httpGet:
+      path: /healthz
+      port: probes
+    initialDelaySeconds: 1
+    periodSeconds: 3
+    failureThreshold: 20
+  readinessProbe:
+    httpGet:
+      path: /healthz
+      port: probes
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+```
+
+The startup probe gives the operator time to start (up to ~60 s with the settings above). Once the startup
+probe succeeds, the readiness probe takes over and will mark the pod as not-ready if any event source
+becomes unhealthy.
+
+## Helm Chart Support
+
+The [generic Helm chart](/docs/documentation/operations/helm-chart) supports health probes out of the box.
+Enable them in your `values.yaml`:
+
+```yaml
+probes:
+  port: 8080
+  startup:
+    enabled: true
+    path: /healthz
+  readiness:
+    enabled: true
+    path: /healthz
+```
+
+All probe timing parameters (`initialDelaySeconds`, `periodSeconds`, `failureThreshold`) have sensible
+defaults and can be overridden.
@@ -11,7 +11,7 @@ patterns so you don't have to write a chart from scratch. The chart is maintaine
 Contributions are more than welcome.
 
 The chart is used in the
-[`metrics-processing` sample operator E2E test](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java)
+[`operations` sample operator E2E test](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/operations/OperationsE2E.java)
 to deploy the operator to a cluster via Helm.
 
 ## What the Chart Provides
@@ -80,16 +80,16 @@ for all available options.
 
 ## Usage Example
 
-A working example of how to use the chart can be found in the metrics-processing sample operator's
-[`helm-values.yaml`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/metrics-processing/src/test/resources/helm-values.yaml):
+A working example of how to use the chart can be found in the operations sample operator's
+[`helm-values.yaml`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/operations/src/test/resources/helm-values.yaml):
 
 ```yaml
 image:
-  repository: metrics-processing-operator
+  repository: operations-operator
   pullPolicy: Never
   tag: "latest"
 
-nameOverride: "metrics-processing-operator"
+nameOverride: "operations-operator"
 
 resources: {}
 
 
@@ -103,9 +103,9 @@ observability sample (see below).
 #### Exploring metrics end-to-end
 
 The
-[`metrics-processing` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/metrics-processing)
+[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations)
 includes a full end-to-end test,
-[`MetricsHandlingE2E`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java),
+[`OperationsE2E`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/metrics/OperationsE2E.java),
 that:
 
 1. Installs a local observability stack (Prometheus, Grafana, OpenTelemetry Collector) via
 
@@ -54,6 +54,42 @@ spec:
           {{- toYaml .Values.securityContext | nindent 12 }}
         image: "{{ required "A valid .Values.image.repository is required" .Values.image.repository }}:{{ include "generic-operator.imageTag" . }}"
         imagePullPolicy: {{ .Values.image.pullPolicy }}
+        {{- if or .Values.probes.startup.enabled .Values.probes.readiness.enabled .Values.probes.liveness.enabled }}
+        ports:
+        - name: probes
+          containerPort: {{ .Values.probes.port }}
+          protocol: TCP
+        {{- end }}
+        {{- if .Values.probes.startup.enabled }}
+        startupProbe:
+          httpGet:
+            path: {{ .Values.probes.startup.path }}
+            port: probes
+          initialDelaySeconds: {{ .Values.probes.startup.initialDelaySeconds }}
+          periodSeconds: {{ .Values.probes.startup.periodSeconds }}
+          timeoutSeconds: {{ .Values.probes.startup.timeoutSeconds }}
+          failureThreshold: {{ .Values.probes.startup.failureThreshold }}
+        {{- end }}
+        {{- if .Values.probes.readiness.enabled }}
+        readinessProbe:
+          httpGet:
+            path: {{ .Values.probes.readiness.path }}
+            port: probes
+          initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }}
+          periodSeconds: {{ .Values.probes.readiness.periodSeconds }}
+          timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }}
+          failureThreshold: {{ .Values.probes.readiness.failureThreshold }}
+        {{- end }}
+        {{- if .Values.probes.liveness.enabled }}
+        livenessProbe:
+          httpGet:
+            path: {{ .Values.probes.liveness.path }}
+            port: probes
+          initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }}
+          periodSeconds: {{ .Values.probes.liveness.periodSeconds }}
+          timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }}
+          failureThreshold: {{ .Values.probes.liveness.failureThreshold }}
+        {{- end }}
         env:
         - name: OPERATOR_NAMESPACE
           valueFrom:
 
@@ -288,3 +288,57 @@ tests:
       - equal:
           path: spec.template.spec.serviceAccountName
           value: my-operator
+
+  - it: should not include probes by default
+    asserts:
+      - isNull:
+          path: spec.template.spec.containers[0].startupProbe
+      - isNull:
+          path: spec.template.spec.containers[0].readinessProbe
+
+  - it: should add startup probe when enabled
+    documentSelector:
+      path: kind
+      value: Deployment
+    set:
+      probes.startup.enabled: true
+    asserts:
+      - equal:
+          path: spec.template.spec.containers[0].startupProbe.httpGet.path
+          value: /health/startup
+      - equal:
+          path: spec.template.spec.containers[0].startupProbe.httpGet.port
+          value: probes
+      - contains:
+          path: spec.template.spec.containers[0].ports
+          content:
+            name: probes
+            containerPort: 8080
+            protocol: TCP
+
+  - it: should add readiness probe when enabled
+    documentSelector:
+      path: kind
+      value: Deployment
+    set:
+      probes.readiness.enabled: true
+    asserts:
+      - equal:
+          path: spec.template.spec.containers[0].readinessProbe.httpGet.path
+          value: /health/ready
+      - equal:
+          path: spec.template.spec.containers[0].readinessProbe.httpGet.port
+          value: probes
+
+  - it: should add both probes when both enabled
+    documentSelector:
+      path: kind
+      value: Deployment
+    set:
+      probes.startup.enabled: true
+      probes.readiness.enabled: true
+    asserts:
+      - isNotNull:
+          path: spec.template.spec.containers[0].startupProbe
+      - isNotNull:
+          path: spec.template.spec.containers[0].readinessProbe
@@ -86,6 +86,9 @@ operatorConfig:
             </Console>
         </Appenders>
         <Loggers>
+            <Logger name="io.micrometer.registry.otlp.OtlpMeterRegistry" level="ERROR" additivity="false">
+                <AppenderRef ref="Console"/>
+            </Logger>
             <Root level="INFO">
                 <AppenderRef ref="Console"/>
             </Root>
@@ -128,3 +131,32 @@ extraVolumeMounts: []
 # RBAC configuration
 rbac:
   create: true
+
+# Health probes configuration
+probes:
+  port: 8080
+  startup:
+    enabled: false
+    path: /health/startup
+    initialDelaySeconds: 1
+    periodSeconds: 10
+    timeoutSeconds: 5
+    failureThreshold: 20
+  readiness:
+    enabled: false
+    path: /health/ready
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 5
+    failureThreshold: 3
+#  We provide an option to specify liveness probes.
+#  However, the framework itself does not define any runtime
+#  information what such probe should check. The only purpose here
+#  is to cover your domain specific use case.
+  liveness:
+    enabled: false
+    path: /health/live
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    timeoutSeconds: 5
+    failureThreshold: 3
@@ -237,7 +237,7 @@ kubectl wait --for=condition=ready pod --all -n cert-manager --timeout=300s 2>/d
 
 # Wait for observability pods
 echo -e "${YELLOW}Checking observability pods...${NC}"
-kubectl wait --for=condition=ready pod --all -n observability --timeout=300s
+kubectl wait --for=condition=ready pod --all -n observability --timeout=480s
 
 echo -e "${GREEN}✓ All pods are ready${NC}"