SeleniumHQ
diff --git a/‎charts/selenium-grid/README.md‎
Lines changed: 37 additions & 16 deletions b/‎charts/selenium-grid/README.md‎
Lines changed: 37 additions & 16 deletions
diff --git a/‎charts/selenium-grid/configs/distributor/distributorProbe.sh‎
Lines changed: 39 additions & 0 deletions b/‎charts/selenium-grid/configs/distributor/distributorProbe.sh‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎charts/selenium-grid/configs/router/routerProbe.sh‎
Lines changed: 27 additions & 0 deletions b/‎charts/selenium-grid/configs/router/routerProbe.sh‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎charts/selenium-grid/templates/_nameHelpers.tpl‎
Lines changed: 14 additions & 0 deletions b/‎charts/selenium-grid/templates/_nameHelpers.tpl‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎charts/selenium-grid/templates/distributor-configmap.yaml‎
Lines changed: 31 additions & 0 deletions b/‎charts/selenium-grid/templates/distributor-configmap.yaml‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎charts/selenium-grid/templates/distributor-deployment.yaml‎
Lines changed: 63 additions & 0 deletions b/‎charts/selenium-grid/templates/distributor-deployment.yaml‎
Lines changed: 63 additions & 0 deletions
@@ -23,7 +23,10 @@ This chart enables the creation of a Selenium Grid Server in Kubernetes.
       * [Configuration `global.K8S_PUBLIC_IP`](#configuration-globalk8s_public_ip)
     * [Configuration of Nodes](#configuration-of-nodes)
       * [Container ports and Service ports](#container-ports-and-service-ports)
-      * [Probes](#probes)
+    * [Configuration of Probes](#configuration-of-probes)
+      * [Node Probes](#node-probes)
+      * [Distributor Probes](#distributor-probes)
+      * [Router Probes](#router-probes)
     * [Configuration extra scripts mount to container](#configuration-extra-scripts-mount-to-container)
     * [Configuration of video recorder and video uploader](#configuration-of-video-recorder-and-video-uploader)
       * [Video recorder](#video-recorder)
@@ -299,20 +302,20 @@ ingress-nginx:
 ### Configuration global
 For now, global configuration supported is:
 
-| Parameter                                      | Default                 | Description                              |
-|------------------------------------------------|-------------------------|------------------------------------------|
-| `global.K8S_PUBLIC_IP`                         | `""`                    | Public IP of the host running K8s        |
-| `global.seleniumGrid.imageRegistry`            | `selenium`              | Distribution registry to pull images     |
-| `global.seleniumGrid.imageTag`                 | `4.21.0-20240522`       | Image tag for all selenium components    |
-| `global.seleniumGrid.nodesImageTag`            | `4.21.0-20240522`       | Image tag for browser's nodes            |
-| `global.seleniumGrid.videoImageTag`            | `ffmpeg-6.1.1-20240522` | Image tag for browser's video recorder   |
-| `global.seleniumGrid.imagePullSecret`          | `""`                    | Pull secret to be used for all images    |
-| `global.seleniumGrid.imagePullSecret`          | `""`                    | Pull secret to be used for all images    |
-| `global.seleniumGrid.affinity`                 | `{}`                    | Affinity assigned globally               |
-| `global.seleniumGrid.logLevel`                 | `INFO`                  | Set log level for all components         |
-| `global.seleniumGrid.defaultNodeStartupProbe`  | `exec`                  | Default startup probe method in Nodes    |
-| `global.seleniumGrid.defaultNodeLivenessProbe` | `exec`                  | Default liveness probe method in Nodes   |
-| `global.seleniumGrid.stdoutProbeLog`           | `true`                  | Enable probe logs output in kubectl logs |
+| Parameter                                           | Default                 | Description                                 |
+|-----------------------------------------------------|-------------------------|---------------------------------------------|
+| `global.K8S_PUBLIC_IP`                              | `""`                    | Public IP of the host running K8s           |
+| `global.seleniumGrid.imageRegistry`                 | `selenium`              | Distribution registry to pull images        |
+| `global.seleniumGrid.imageTag`                      | `4.21.0-20240522`       | Image tag for all selenium components       |
+| `global.seleniumGrid.nodesImageTag`                 | `4.21.0-20240522`       | Image tag for browser's nodes               |
+| `global.seleniumGrid.videoImageTag`                 | `ffmpeg-6.1.1-20240522` | Image tag for browser's video recorder      |
+| `global.seleniumGrid.imagePullSecret`               | `""`                    | Pull secret to be used for all images       |
+| `global.seleniumGrid.affinity`                      | `{}`                    | Affinity assigned globally                  |
+| `global.seleniumGrid.logLevel`                      | `INFO`                  | Set log level for all components            |
+| `global.seleniumGrid.defaultNodeStartupProbe`       | `exec`                  | Default startup probe method in Nodes       |
+| `global.seleniumGrid.defaultNodeLivenessProbe`      | `exec`                  | Default liveness probe method in Nodes      |
+| `global.seleniumGrid.defaultComponentLivenessProbe` | `exec`                  | Default liveness probe method in Components |
+| `global.seleniumGrid.stdoutProbeLog`                | `true`                  | Enable probe logs output in kubectl logs    |
 
 #### Configuration `global.K8S_PUBLIC_IP`
 
@@ -379,7 +382,9 @@ edgeNode:
       protocol: TCP
 ```
 
-#### Probes
+### Configuration of Probes
+
+#### Node Probes
 
 By default, `startupProbe` is enabled and `readinessProbe` and `livenessProbe` are disabled. You can enable/disable them via `.startupProbe.enabled` `.readinessProbe.enabled` `.livenessProbe.enabled` in respective node type.
 
@@ -411,6 +416,22 @@ edgeNode:
     periodSeconds: 5
 ```
 
+#### Distributor Probes
+
+By default, `startupProbe`, `readinessProbe` and `livenessProbe` are enabled for this component in both full distributed and Hub-Nodes mode.
+
+There is a script in chart `configs/distributor/distributorProbe.sh` is loaded into ConfigMap and mounted to the container is used by `livenessProbe`. You can customize the script via `--set-file distributorConfigMap.extraScripts.distributorProbe\.sh=/path/to/your_script.sh` or set via YAML values.
+
+There are some reports on a scenario that would be difficult to reproduce or rare: `Grid UI is accessible but no nodes can be fetched or registered. Or something like there are few requests in session queue but could not be accepted. After restarting the Distributor, the issue is resolved`. Based on that, a proactive approach to do automatic restart whenever detecting it is not healthy via `livenessProbe` and the condition check is executed. The script queries GraphQL endpoint to get `sessionCount`, and `sessionQueueSize`. If the `sessionQueueSize` is greater than 0 and `sessionCount` is 0 until the `failureThreshold`, the Distributor will be restarted. You can adjust the threshold as well as interval via probe settings.
+
+#### Router Probes
+
+By default, `startupProbe`, `readinessProbe` and `livenessProbe` are enabled for this component in full distributed mode.
+
+There is a script in chart `configs/router/routerProbe.sh` loaded into ConfigMap and mounted to the container is used by `livenessProbe`. You can customize the script via `--set-file routerConfigMap.extraScripts.routerProbe\.sh=/path/to/your_script.sh` or set via YAML values.
+
+The script checks GraphQL endpoint is reachable. If the `http_code` is not `200` until the `failureThreshold`, the Router will be restarted. You can adjust the threshold as well as interval via probe settings.
+
 ### Configuration extra scripts mount to container
 
 This is supported for containers of browser node, video recorder and video uploader. By default, in these containers, there are scripts, config files implemented. In case you want to customize or replace them with your own implementation. Instead of forking the chart, use volume mount. Now, from your external files, you can insert them into ConfigMap via Helm CLI `--set-file` or compose them in your own YAML values file and pass to Helm CLI `--values` when deploying chart. Any files name that you defined will be picked up into ConfigMap and mounted to the container.
 
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+max_time=3
+retry_time=3
+probe_name="Probe.${1:-"Liveness"}"
+ts_format=${SE_LOG_TIMESTAMP_FORMAT:-"+%T.%3N"}
+
+if [ -n "${ROUTER_USERNAME}" ] && [ -n "${ROUTER_PASSWORD}" ]; then
+  BASIC_AUTH="${ROUTER_USERNAME}:${ROUTER_PASSWORD}@"
+fi
+
+if [ -z "${SE_GRID_GRAPHQL_URL}" ] && [ -n "${SE_HUB_HOST:-${SE_ROUTER_HOST}}" ] && [ -n "${SE_HUB_PORT:-${SE_ROUTER_PORT}}" ]; then
+  SE_GRID_GRAPHQL_URL="${SE_SERVER_PROTOCOL}://${BASIC_AUTH}${SE_HUB_HOST:-${SE_ROUTER_HOST}}:${SE_HUB_PORT:-${SE_ROUTER_PORT}}${SE_SUB_PATH}/graphql"
+elif [ -z "${SE_GRID_GRAPHQL_URL}" ]; then
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - Could not construct GraphQL endpoint, it can be set directly via SE_GRID_GRAPHQL_URL. Bypass the probe checks for now."
+  exit 0
+fi
+
+GRAPHQL_PRE_CHECK=$(curl --noproxy "*" -m ${max_time} -k -X POST -H "Content-Type: application/json" --data '{"query":"{ grid { sessionCount } }"}' -s -o /dev/null -w "%{http_code}" ${SE_GRID_GRAPHQL_URL})
+
+if [ ${GRAPHQL_PRE_CHECK} -ne 200 ]; then
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - GraphQL endpoint ${SE_GRID_GRAPHQL_URL} is not reachable. Status code: ${GRAPHQL_PRE_CHECK}."
+  exit 1
+fi
+
+SESSION_QUEUE_SIZE=$(curl --noproxy "*" --retry ${retry_time} -m ${max_time} -k -X POST -H "Content-Type: application/json" --data '{"query":"{ grid { sessionQueueSize } }"}' -s ${SE_GRID_GRAPHQL_URL} | jq -r '.data.grid.sessionQueueSize')
+
+SESSION_COUNT=$(curl --noproxy "*" --retry ${retry_time} -m ${max_time} -k -X POST -H "Content-Type: application/json" --data '{"query": "{ grid { sessionCount } }"}' -s ${SE_GRID_GRAPHQL_URL} | jq -r '.data.grid.sessionCount')
+
+MAX_SESSION=$(curl --noproxy "*" --retry ${retry_time} -m ${max_time} -k -X POST -H "Content-Type: application/json" --data '{"query":"{ grid { maxSession } }"}' -s ${SE_GRID_GRAPHQL_URL} | jq -r '.data.grid.maxSession')
+
+if [ ${SESSION_QUEUE_SIZE} -gt 0 ] && [ ${SESSION_COUNT} -eq 0 ]; then
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - Session Queue Size: ${SESSION_QUEUE_SIZE}, Session Count: ${SESSION_COUNT}, Max Session: ${MAX_SESSION}"
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - It seems the Distributor is delayed in processing a new session in the queue. Probe checks failed."
+  exit 1
+else
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - Distributor is healthy."
+  exit 0
+fi
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+max_time=3
+retry_time=3
+probe_name="Probe.${1:-"Liveness"}"
+ts_format=${SE_LOG_TIMESTAMP_FORMAT:-"+%T.%3N"}
+
+if [ -n "${ROUTER_USERNAME}" ] && [ -n "${ROUTER_PASSWORD}" ]; then
+  BASIC_AUTH="${ROUTER_USERNAME}:${ROUTER_PASSWORD}@"
+fi
+
+if [ -z "${SE_GRID_GRAPHQL_URL}" ] && [ -n "${SE_HUB_HOST:-${SE_ROUTER_HOST}}" ] && [ -n "${SE_HUB_PORT:-${SE_ROUTER_PORT}}" ]; then
+  SE_GRID_GRAPHQL_URL="${SE_SERVER_PROTOCOL}://${BASIC_AUTH}${SE_HUB_HOST:-${SE_ROUTER_HOST}}:${SE_HUB_PORT:-${SE_ROUTER_PORT}}${SE_SUB_PATH}/graphql"
+elif [ -z "${SE_GRID_GRAPHQL_URL}" ]; then
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - Could not construct GraphQL endpoint, it can be set directly via SE_GRID_GRAPHQL_URL. Bypass the probe checks for now."
+  exit 0
+fi
+
+GRAPHQL_PRE_CHECK=$(curl --noproxy "*" -m ${max_time} -k -X POST -H "Content-Type: application/json" --data '{"query":"{ grid { sessionCount } }"}' -s -o /dev/null -w "%{http_code}" ${SE_GRID_GRAPHQL_URL})
+
+if [ ${GRAPHQL_PRE_CHECK} -ne 200 ]; then
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - GraphQL endpoint ${SE_GRID_GRAPHQL_URL} is not reachable. Status code: ${GRAPHQL_PRE_CHECK}."
+  exit 1
+else
+  echo "$(date ${ts_format}) DEBUG [${probe_name}] - GraphQL endpoint is healthy."
+  exit 0
+fi
@@ -154,6 +154,20 @@ Service Account fullname
 {{- tpl (default (include "seleniumGrid.component.name" (list "selenium-serviceaccount" $)) .Values.serviceAccount.nameOverride) $ | trunc 63 | trimSuffix "-" -}}
 {{- end -}}
 
+{{/*
+Distributor ConfigMap fullname
+*/}}
+{{- define "seleniumGrid.distributor.configmap.fullname" -}}
+{{- tpl (default (include "seleniumGrid.component.name" (list "selenium-distributor-config" $)) .Values.distributorConfigMap.nameOverride) $ | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Router ConfigMap fullname
+*/}}
+{{- define "seleniumGrid.router.configmap.fullname" -}}
+{{- tpl (default (include "seleniumGrid.component.name" (list "selenium-router-config" $)) .Values.routerConfigMap.nameOverride) $ | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
 {{/*
 Recorder ConfigMap fullname
 */}}
 
@@ -0,0 +1,31 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ template "seleniumGrid.distributor.configmap.fullname" $ }}
+  namespace: {{ .Release.Namespace }}
+{{- with .Values.distributorConfigMap.annotations }}
+  annotations: {{- toYaml . | nindent 4 }}
+{{- end }}
+  labels:
+    {{- include "seleniumGrid.commonLabels" . | nindent 4 }}
+    {{- with .Values.customLabels }}
+      {{- toYaml . | nindent 4 }}
+    {{- end }}
+data:
+  SE_GRID_GRAPHQL_URL: '{{ include "seleniumGrid.graphqlURL" $ }}'
+{{- $fileProceeded := list -}}
+{{- range $path, $_ :=  .Files.Glob $.Values.distributorConfigMap.extraScriptsImportFrom }}
+  {{- $fileName := base $path -}}
+  {{- $value := index $.Values.distributorConfigMap.extraScripts $fileName -}}
+  {{- if empty $value }}
+{{- $fileName | nindent 2 -}}: {{- toYaml ($.Files.Get $path) | indent 4 }}
+  {{- else }}
+{{- $fileName | nindent 2 -}}: {{- toYaml $value | indent 4 }}
+  {{- end }}
+  {{- $fileProceeded = append $fileProceeded $fileName -}}
+{{- end }}
+{{- range $fileName, $value := .Values.distributorConfigMap.extraScripts }}
+  {{- if not (has $fileName $fileProceeded) }}
+{{- $fileName | nindent 2 -}}: {{- toYaml (default "" $value) | indent 4 }}
+  {{- end }}
+{{- end }}
@@ -24,6 +24,7 @@ spec:
         checksum/event-bus-configmap: {{ include (print $.Template.BasePath "/event-bus-configmap.yaml") . | sha256sum }}
         checksum/logging-configmap: {{ include (print $.Template.BasePath "/logging-configmap.yaml") . | sha256sum }}
         checksum/server-configmap: {{ include (print $.Template.BasePath "/server-configmap.yaml") . | sha256sum }}
+        checksum/distributor-configmap: {{ include (print $.Template.BasePath "/distributor-configmap.yaml") . | sha256sum }}
         checksum/secrets: {{ include (print $.Template.BasePath "/secrets.yaml") . | sha256sum }}
     {{- with .Values.components.distributor.annotations }}
         {{- toYaml . | nindent 8 }}
@@ -63,6 +64,8 @@ spec:
             {{- tpl (toYaml .) $ | nindent 12 }}
           {{- end }}
           envFrom:
+            - configMapRef:
+                name: {{ template "seleniumGrid.distributor.configmap.fullname" . }}
             - configMapRef:
                 name: {{ template "seleniumGrid.eventBus.configmap.fullname" . }}
             - configMapRef:
@@ -75,6 +78,11 @@ spec:
               {{- toYaml . | nindent 12 }}
             {{- end }}
           volumeMounts:
+          {{- range $fileName, $value := $.Values.distributorConfigMap.extraScripts }}
+            - name: {{ tpl (default (include "seleniumGrid.distributor.configmap.fullname" $) $.Values.distributorConfigMap.scriptVolumeMountName) $ | quote }}
+              mountPath: {{ $.Values.distributorConfigMap.extraScriptsDirectory }}/{{ $fileName }}
+              subPath: {{ $fileName }}
+          {{- end }}
           {{- if .Values.tls.enabled }}
             - name: {{ include "seleniumGrid.tls.fullname" .  | quote }}
               mountPath: {{ .Values.serverConfigMap.certVolumeMountPath | quote }}
@@ -83,6 +91,57 @@ spec:
           ports:
             - containerPort: {{ .Values.components.distributor.port }}
               protocol: TCP
+        {{- if .Values.components.distributor.startupProbe.enabled }}
+          {{- with .Values.components.distributor.startupProbe }}
+          startupProbe:
+          {{- if (ne (include "seleniumGrid.probe.fromUserDefine" (dict "values" . "root" $)) "{}") }}
+            {{- include "seleniumGrid.probe.fromUserDefine" (dict "values" . "root" $) | nindent 10 }}
+          {{- else }}
+            httpGet:
+              scheme: {{ default (include "seleniumGrid.probe.httpGet.schema" $) .schema }}
+              path: {{ .path }}
+              port: {{ default ($.Values.components.distributor.port) .port }}
+          {{- end }}
+          {{- if (ne (include "seleniumGrid.probe.settings" .) "{}") }}
+            {{- include "seleniumGrid.probe.settings" . | nindent 12 }}
+          {{- end }}
+          {{- end }}
+        {{- end }}
+        {{- if .Values.components.distributor.readinessProbe.enabled }}
+          {{- with .Values.components.distributor.readinessProbe }}
+          readinessProbe:
+          {{- if (ne (include "seleniumGrid.probe.fromUserDefine" (dict "values" . "root" $)) "{}") }}
+            {{- include "seleniumGrid.probe.fromUserDefine" (dict "values" . "root" $) | nindent 10 }}
+          {{- else }}
+            httpGet:
+              scheme: {{ default (include "seleniumGrid.probe.httpGet.schema" $) .schema }}
+              path: {{ .path }}
+              port: {{ default ($.Values.components.distributor.port) .port }}
+          {{- end }}
+          {{- if (ne (include "seleniumGrid.probe.settings" .) "{}") }}
+            {{- include "seleniumGrid.probe.settings" . | nindent 12 }}
+          {{- end }}
+          {{- end }}
+        {{- end }}
+        {{- if .Values.components.distributor.livenessProbe.enabled }}
+          {{- with .Values.components.distributor.livenessProbe }}
+          livenessProbe:
+          {{- if (ne (include "seleniumGrid.probe.fromUserDefine" (dict "values" . "root" $)) "{}") }}
+            {{- include "seleniumGrid.probe.fromUserDefine" (dict "values" . "root" $) | nindent 10 }}
+          {{- else if eq $.Values.global.seleniumGrid.defaultComponentLivenessProbe "exec" }}
+            exec:
+              command: ["bash", "-c", "{{ $.Values.distributorConfigMap.extraScriptsDirectory }}/distributorProbe.sh Liveness {{ include "seleniumGrid.probe.stdout" $ }}"]
+          {{- else }}
+            httpGet:
+              scheme: {{ default (include "seleniumGrid.probe.httpGet.schema" $) .schema }}
+              path: {{ .path }}
+              port: {{ default ($.Values.components.distributor.port) .port }}
+          {{- end }}
+          {{- if (ne (include "seleniumGrid.probe.settings" .) "{}") }}
+            {{- include "seleniumGrid.probe.settings" . | nindent 12 }}
+          {{- end }}
+          {{- end }}
+        {{- end }}
         {{- with .Values.components.distributor.resources }}
           resources: {{- toYaml . | nindent 12 }}
         {{- end }}
@@ -107,6 +166,10 @@ spec:
       priorityClassName: {{ . }}
     {{- end }}
       volumes:
+        - name: {{ tpl (default (include "seleniumGrid.distributor.configmap.fullname" $) $.Values.distributorConfigMap.scriptVolumeMountName) $ | quote }}
+          configMap:
+            name: {{ template "seleniumGrid.distributor.configmap.fullname" $ }}
+            defaultMode: {{ $.Values.distributorConfigMap.defaultMode }}
       {{- if .Values.tls.enabled }}
         - name: {{ include "seleniumGrid.tls.fullname" .  | quote }}
           secret: