-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Closed
Labels
I-autoscaling-k8sIssue relates to autoscaling in Kubernetes, or the scaler in KEDAIssue relates to autoscaling in Kubernetes, or the scaler in KEDAR-awaiting-retest
Description
What happened?
I tried upgrading the grid 4.27.0 from helm version 0.38.0 to 0.38.2(trunk branch) and the KEDA does not seems to be picking pending sessions from queue.
I am passing just on capability in my tests browserName: 'chrome'
capabilities: [{
browserName: 'chrome',
'se:downloadsEnabled': true
}],
Autoscaling type is Job. I have tried both default and accurate strategy. Is there some breaking change in 0.38.2.
Command used to start Selenium Grid with Docker (or Kubernetes)
Using helm 0.38.2
Note : I have taken the helm chart from trunk branch today and it has latest commit https://github.com/SeleniumHQ/docker-selenium/commit/d01680cba3feb3d050d9ff667aaa9816fca8e33a
global:
seleniumGrid:
# Image registry for all selenium components
imageRegistry: myrepo/selenium-grid
# Image tag for all selenium components
imageTag: "4.27.0-20241225"
# Image tag for browser's nodes
nodesImageTag: "4.27.0-20241225"
# Image tag for browser's video recorder
imagePullSecret: ""
# Log level for all components. Possible values describe here: https://www.selenium.dev/documentation/grid/configuration/cli_options/#logging
logLevel: INFO
# -- Whether to enable structured logging
structuredLogs: true
# kubectl image is used to execute kubectl commands in utility jobs
kubectlImage: myrepo/bitnami/kubectl:latest
isolateComponents: false
# Basic auth settings for Selenium Grid
basicAuth:
# Enable or disable basic auth
enabled: true
# -- Username for basic auth
username: $GRID_USERNAME
# -- Password for basic auth
password: $GRID_PASSWORD
# -- Embed the basic auth "u:p@" in few URLs e.g. SE_NODE_GRID_URL.
embeddedUrl: false
autoscaling:
enabled: true
scalingType: job
scaledOptions:
minReplicaCount: 0
maxReplicaCount: $MAX_REPLICAS_COUNT
pollingInterval: 20
# terminationGracePeriodSeconds: 5400 #default
# Options for KEDA ScaledJobs (only used when scalingType is set to "job"). See https://keda.sh/docs/latest/concepts/scaling-jobs/#scaledjob-spec
scaledJobOptions:
scalingStrategy:
# Change this from "default" to "accurate" or "eager" when the calculation problem is fixed
# -- Scaling strategy for KEDA ScaledJob
strategy: default
customLabels: {"app-id": "selgrid", "app-tier": "application", "app-name": "selgrid"}
tls:
ingress:
enabled: true
ingress:
# Name of ingress class to select which controller will implement ingress resource
# Custom annotations for ingress resource
annotations:
nginx.ingress.kubernetes.io/proxy-connect-timeout: "900"
nginx.ingress.kubernetes.io/proxy-read-timeout: "900"
nginx.ingress.kubernetes.io/proxy-send-timeout: "900"
nginx.ingress.kubernetes.io/proxy-body-size: 100m
# Default host for the ingress resource
hostname: $INGRESS_DOMAIN
tls:
- secretName: sel-grid-tls-secret-dynamic
hosts:
- $INGRESS_DOMAIN
router:
imagePullPolicy: Always
distributor:
imagePullPolicy: Always
eventBus:
imagePullPolicy: Always
sessionMap:
imagePullPolicy: Always
sessionQueue:
imagePullPolicy: Always
hub:
# Custom sub path for the hub deployment
# subPath: /selenium
imagePullPolicy: Always
# Resources for container
resources:
requests:
memory: "200Mi"
cpu: "100m"
limits:
memory: "9Gi"
cpu: "4"
extraEnvironmentVariables:
- name: SE_JAVA_OPTS
value: "-Xmx8192m"
chromeNode:
# Number of chrome nodes Only used when Autoscaling is false
replicas: $FIXED_CHROME_REPLICAS
imagePullPolicy: Always
# /dev/shm volume
dshmVolumeSizeLimit: "2Gi"
# Resources for chrome-node container
resources:
requests:
memory: "100Mi"
cpu: "100m"
limits:
memory: "2Gi"
cpu: "2"
extraEnvironmentVariables:
- name: "SE_NODE_ENABLE_MANAGED_DOWNLOADS"
value: "true"
# - name: "SE_VNC_NO_PASSWORD"
# value: "1"
# - name: "SE_VNC_VIEW_ONLY"
# value: "1"
terminationGracePeriodSeconds: 5400
edgeNode:
replicas: $FIXED_EDGE_REPLICAS
imagePullPolicy: Always
# /dev/shm volume
dshmVolumeSizeLimit: "2Gi"
# Resources for edge-node container
resources:
requests:
memory: "100Mi"
cpu: "100m"
limits:
memory: "2Gi"
cpu: "2"
extraEnvironmentVariables:
- name: "SE_NODE_ENABLE_MANAGED_DOWNLOADS"
value: "true"
# - name: "SE_VNC_NO_PASSWORD"
# value: "1"
# - name: "SE_VNC_VIEW_ONLY"
# value: "1"
terminationGracePeriodSeconds: 5400
firefoxNode:
enabled: false
imagePullPolicy: Always
# /dev/shm volume
dshmVolumeSizeLimit: "2Gi"
# Resources for firefox-node container
resources:
requests:
memory: "1Gi"
cpu: "1"
limits:
memory: "2Gi"
cpu: "2"
extraEnvironmentVariables:
- name: "SE_NODE_ENABLE_MANAGED_DOWNLOADS"
value: "true"
# - name: "SE_VNC_NO_PASSWORD"
# value: "1"
# - name: "SE_VNC_VIEW_ONLY"
# value: "1"
autoscaling:
scaledOptions:
minReplicaCount: 0
maxReplicaCount: 3
terminationGracePeriodSeconds: 5400
keda:
image:
keda:
registry: myrepo
# -- Image name of KEDA operator
repository: myrepo/kedacore/keda
# -- Image tag of KEDA operator. Optional, given app version of Helm chart is used by default
tag: $KEDA_VERSION
metricsApiServer:
registry: myrepo
# -- Image name of KEDA Metrics API Server
repository: myrepo/kedacore/keda-metrics-apiserver
# -- Image tag of KEDA Metrics API Server. Optional, given app version of Helm chart is used by default
tag: $KEDA_VERSION
webhooks:
registry: myrepo
# -- Image name of KEDA admission-webhooks
repository: myrepo/kedacore/keda-admission-webhooks
# -- Image tag of KEDA admission-webhooks . Optional, given app version of Helm chart is used by default
tag: $KEDA_VERSION
# -- Image pullPolicy for all KEDA components
podLabels:
# -- Pod labels for KEDA operator
keda: {"app-id": "selgrid", "app-tier": "application", "app-name": "selgrid"}
# -- Pod labels for KEDA Metrics Adapter
metricsAdapter: {"app-id": "selgrid", "app-tier": "application", "app-name": "selgrid"}
# -- Pod labels for KEDA Admission webhooks
webhooks: {"app-id": "selgrid", "app-tier": "application", "app-name": "selgrid"}
Relevant log output
2024-12-27T10:06:00Z INFO setup maxprocs: Updating GOMAXPROCS=1: determined from CPU quota
2024-12-27T10:06:00Z INFO setup Starting manager
2024-12-27T10:06:00Z INFO setup KEDA Version: 2.16.1
2024-12-27T10:06:00Z INFO setup Git Commit: ce14b239e0300f388b0425aef68154d8070cd66f
2024-12-27T10:06:00Z INFO setup Go Version: go1.23.4
2024-12-27T10:06:00Z INFO setup Go OS/Arch: linux/amd64
2024-12-27T10:06:00Z INFO setup Running on Kubernetes 1.28+ {"version": "v1.28.15-eks-7f9249a"}
2024-12-27T10:06:00Z INFO setup WARNING: KEDA 2.16.1 hasn't been tested on Kubernetes v1.28.15-eks-7f9249a
2024-12-27T10:06:00Z INFO setup You can check recommended versions on https://keda.sh
2024-12-27T10:06:00Z INFO starting server {"name": "health probe", "addr": "[::]:8081"}
I1227 10:06:00.649846 1 leaderelection.go:254] attempting to acquire leader lease mer-merselgrid-dev-mer-sel-grid/operator.keda.sh...
I1227 10:06:17.014943 1 leaderelection.go:268] successfully acquired lease mer-merselgrid-dev-mer-sel-grid/operator.keda.sh
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "scaledobject", "controllerGroup": "keda.sh", "controllerKind": "ScaledObject", "source": "kind source: *v1alpha1.ScaledObject"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "scaledobject", "controllerGroup": "keda.sh", "controllerKind": "ScaledObject", "source": "kind source: *v2.HorizontalPodAutoscaler"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "scaledobject", "controllerGroup": "keda.sh", "controllerKind": "ScaledObject"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "triggerauthentication", "controllerGroup": "keda.sh", "controllerKind": "TriggerAuthentication", "source": "kind source: *v1alpha1.TriggerAuthentication"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "triggerauthentication", "controllerGroup": "keda.sh", "controllerKind": "TriggerAuthentication"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "source": "kind source: *v1alpha1.ScaledJob"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "cloudeventsource", "controllerGroup": "eventing.keda.sh", "controllerKind": "CloudEventSource", "source": "kind source: *v1alpha1.CloudEventSource"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "cloudeventsource", "controllerGroup": "eventing.keda.sh", "controllerKind": "CloudEventSource"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "clustertriggerauthentication", "controllerGroup": "keda.sh", "controllerKind": "ClusterTriggerAuthentication", "source": "kind source: *v1alpha1.ClusterTriggerAuthentication"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "clustertriggerauthentication", "controllerGroup": "keda.sh", "controllerKind": "ClusterTriggerAuthentication"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "clustercloudeventsource", "controllerGroup": "eventing.keda.sh", "controllerKind": "ClusterCloudEventSource", "source": "kind source: *v1alpha1.ClusterCloudEventSource"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "clustercloudeventsource", "controllerGroup": "eventing.keda.sh", "controllerKind": "ClusterCloudEventSource"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "cert-rotator", "source": "kind source: *v1.Secret"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "cert-rotator", "source": "kind source: *unstructured.Unstructured"}
2024-12-27T10:06:17Z INFO Starting EventSource {"controller": "cert-rotator", "source": "kind source: *unstructured.Unstructured"}
2024-12-27T10:06:17Z INFO Starting Controller {"controller": "cert-rotator"}
2024-12-27T10:06:17Z INFO cert-rotation starting cert rotator controller
2024-12-27T10:06:17Z INFO cert-rotation no cert refresh needed
2024-12-27T10:06:17Z INFO cert-rotation certs are ready in /certs
2024-12-27T10:06:17Z INFO Starting workers {"controller": "cert-rotator", "worker count": 1}
2024-12-27T10:06:17Z INFO cert-rotation no cert refresh needed
2024-12-27T10:06:17Z ERROR cert-rotation Webhook not found. Unable to update certificate. {"name": "keda-admission", "gvk": "admissionregistration.k8s.io/v1, Kind=ValidatingWebhookConfiguration", "error": "ValidatingWebhookConfiguration.admissionregistration.k8s.io \"keda-admission\" not found"}
github.com/open-policy-agent/cert-controller/pkg/rotator.(*ReconcileWH).ensureCerts
/workspace/vendor/github.com/open-policy-agent/cert-controller/pkg/rotator/rotator.go:822
github.com/open-policy-agent/cert-controller/pkg/rotator.(*ReconcileWH).Reconcile
/workspace/vendor/github.com/open-policy-agent/cert-controller/pkg/rotator/rotator.go:791
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile
/workspace/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:116
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler
/workspace/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:303
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem
/workspace/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:263
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2
/workspace/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:224
2024-12-27T10:06:17Z INFO cert-rotation Ensuring CA cert {"name": "v1beta1.external.metrics.k8s.io", "gvk": "apiregistration.k8s.io/v1, Kind=APIService", "name": "v1beta1.external.metrics.k8s.io", "gvk": "apiregistration.k8s.io/v1, Kind=APIService"}
2024-12-27T10:06:17Z INFO Starting workers {"controller": "triggerauthentication", "controllerGroup": "keda.sh", "controllerKind": "TriggerAuthentication", "worker count": 1}
2024-12-27T10:06:17Z INFO Starting workers {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "worker count": 1}
2024-12-27T10:06:17Z INFO Starting workers {"controller": "scaledobject", "controllerGroup": "keda.sh", "controllerKind": "ScaledObject", "worker count": 5}
2024-12-27T10:06:17Z INFO Reconciling ScaledJob {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"org-selgrid-selenium-node-chrome","namespace":"mer-merselgrid-dev-mer-sel-grid"}, "namespace": "mer-merselgrid-dev-mer-sel-grid", "name": "org-selgrid-selenium-node-chrome", "reconcileID": "c7dd5fba-37ed-495f-8796-8286dad16274"}
2024-12-27T10:06:17Z INFO Starting workers {"controller": "clustercloudeventsource", "controllerGroup": "eventing.keda.sh", "controllerKind": "ClusterCloudEventSource", "worker count": 1}
2024-12-27T10:06:17Z INFO Starting workers {"controller": "cloudeventsource", "controllerGroup": "eventing.keda.sh", "controllerKind": "CloudEventSource", "worker count": 1}
2024-12-27T10:06:17Z INFO Starting workers {"controller": "clustertriggerauthentication", "controllerGroup": "keda.sh", "controllerKind": "ClusterTriggerAuthentication", "worker count": 1}
2024-12-27T10:06:17Z INFO KubeAPIWarningLogger unknown field "status.authenticationsTypes"
2024-12-27T10:06:17Z INFO KubeAPIWarningLogger unknown field "status.triggersTypes"
2024-12-27T10:06:17Z INFO RolloutStrategy: immediate, No jobs owned by the previous version of the scaledJob {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"org-selgrid-selenium-node-chrome","namespace":"mer-merselgrid-dev-mer-sel-grid"}, "namespace": "mer-merselgrid-dev-mer-sel-grid", "name": "org-selgrid-selenium-node-chrome", "reconcileID": "c7dd5fba-37ed-495f-8796-8286dad16274"}
2024-12-27T10:06:17Z INFO Initializing Scaling logic according to ScaledJob Specification {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"org-selgrid-selenium-node-chrome","namespace":"mer-merselgrid-dev-mer-sel-grid"}, "namespace": "mer-merselgrid-dev-mer-sel-grid", "name": "org-selgrid-selenium-node-chrome", "reconcileID": "c7dd5fba-37ed-495f-8796-8286dad16274"}
2024-12-27T10:06:17Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "org-selgrid-selenium-node-chrome", "scaledJob.Namespace": "mer-merselgrid-dev-mer-sel-grid", "Number of running Jobs": 0}
2024-12-27T10:06:17Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "org-selgrid-selenium-node-chrome", "scaledJob.Namespace": "mer-merselgrid-dev-mer-sel-grid", "Number of pending Jobs": 0}
2024-12-27T10:06:17Z INFO Reconciling ScaledJob {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"org-selgrid-selenium-node-edge","namespace":"mer-merselgrid-dev-mer-sel-grid"}, "namespace": "mer-merselgrid-dev-mer-sel-grid", "name": "org-selgrid-selenium-node-edge", "reconcileID": "0c1ff265-5fa8-41f4-a218-776a7bab2dd8"}
2024-12-27T10:06:17Z INFO RolloutStrategy: immediate, No jobs owned by the previous version of the scaledJob {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"org-selgrid-selenium-node-edge","namespace":"mer-merselgrid-dev-mer-sel-grid"}, "namespace": "mer-merselgrid-dev-mer-sel-grid", "name": "org-selgrid-selenium-node-edge", "reconcileID": "0c1ff265-5fa8-41f4-a218-776a7bab2dd8"}
2024-12-27T10:06:17Z INFO Initializing Scaling logic according to ScaledJob Specification {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"org-selgrid-selenium-node-edge","namespace":"mer-merselgrid-dev-mer-sel-grid"}, "namespace": "mer-merselgrid-dev-mer-sel-grid", "name": "org-selgrid-selenium-node-edge", "reconcileID": "0c1ff265-5fa8-41f4-a218-776a7bab2dd8"}
2024-12-27T10:06:17Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "org-selgrid-selenium-node-edge", "scaledJob.Namespace": "mer-merselgrid-dev-mer-sel-grid", "Number of running Jobs": 0}
2024-12-27T10:06:17Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "org-selgrid-selenium-node-edge", "scaledJob.Namespace": "mer-merselgrid-dev-mer-sel-grid", "Number of pending Jobs": 0}
Operating System
v1.28.15-eks-7f9249a
Docker Selenium version (image tag)
4.27.0-20241225
Selenium Grid chart version (chart version)
0.38.2
Metadata
Metadata
Assignees
Labels
I-autoscaling-k8sIssue relates to autoscaling in Kubernetes, or the scaler in KEDAIssue relates to autoscaling in Kubernetes, or the scaler in KEDAR-awaiting-retest