SeleniumHQ
diff --git a/‎.github/workflows/k8s-scaling-test.yml‎
Lines changed: 40 additions & 51 deletions b/‎.github/workflows/k8s-scaling-test.yml‎
Lines changed: 40 additions & 51 deletions
diff --git a/‎Makefile‎
Lines changed: 31 additions & 1 deletion b/‎Makefile‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎tests/AutoscalingTests/__init__.py‎ b/‎tests/AutoscalingTests/__init__.py‎
diff --git a/‎tests/AutoscalingTests/common.py‎
Lines changed: 90 additions & 0 deletions b/‎tests/AutoscalingTests/common.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎tests/AutoscalingTests/test_scale_chaos.py‎
Lines changed: 58 additions & 0 deletions b/‎tests/AutoscalingTests/test_scale_chaos.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎tests/AutoscalingTests/test_scale_up.py‎
Lines changed: 59 additions & 0 deletions b/‎tests/AutoscalingTests/test_scale_up.py‎
Lines changed: 59 additions & 0 deletions
@@ -16,12 +16,43 @@ permissions:
 jobs:
   build-and-test:
     name: Test K8s
-    runs-on: blacksmith-16vcpu-ubuntu-2204
+    runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
         include:
           - k8s-version: 'v1.31.2'
+            test-strategy: chart_test_autoscaling_job_count_chaos
+            cluster: 'minikube'
+            helm-version: 'v3.16.3'
+            docker-version: '27.3.1'
+            python-version: '3.13'
+          - k8s-version: 'v1.31.2'
+            test-strategy: chart_test_autoscaling_job_count_max_sessions
+            cluster: 'minikube'
+            helm-version: 'v3.16.3'
+            docker-version: '27.3.1'
+            python-version: '3.13'
+          - k8s-version: 'v1.31.2'
+            test-strategy: chart_test_autoscaling_job_count
+            cluster: 'minikube'
+            helm-version: 'v3.16.3'
+            docker-version: '27.3.1'
+            python-version: '3.13'
+          - k8s-version: 'v1.31.2'
+            test-strategy: chart_test_autoscaling_deployment_count_chaos
+            cluster: 'minikube'
+            helm-version: 'v3.16.3'
+            docker-version: '27.3.1'
+            python-version: '3.13'
+          - k8s-version: 'v1.31.2'
+            test-strategy: chart_test_autoscaling_deployment_count_max_sessions
+            cluster: 'minikube'
+            helm-version: 'v3.16.3'
+            docker-version: '27.3.1'
+            python-version: '3.13'
+          - k8s-version: 'v1.31.2'
+            test-strategy: chart_test_autoscaling_deployment_count
             cluster: 'minikube'
             helm-version: 'v3.16.3'
             docker-version: '27.3.1'
@@ -79,11 +110,6 @@ jobs:
           echo "AUTHORS=${AUTHORS}" >> $GITHUB_ENV
         env:
           AUTHORS: ${{ vars.AUTHORS || 'SeleniumHQ' }}
-      - name: Build Helm charts
-        run: |
-          BUILD_DATE=${BUILD_DATE} make chart_build
-          echo "CHART_PACKAGE_PATH=$(cat /tmp/selenium_chart_version)" >> $GITHUB_ENV
-          echo "CHART_FILE_NAME=$(basename $(cat /tmp/selenium_chart_version))" >> $GITHUB_ENV
       - name: Build Docker images
         uses: nick-invision/retry@master
         with:
@@ -97,61 +123,24 @@ jobs:
           timeout_minutes: 10
           max_attempts: 3
           command: CLUSTER=${CLUSTER} SERVICE_MESH=${SERVICE_MESH} KUBERNETES_VERSION=${KUBERNETES_VERSION} NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} make chart_cluster_setup
+      - name: Build Helm charts
+        run: |
+          BUILD_DATE=${BUILD_DATE} make chart_build
+          echo "CHART_PACKAGE_PATH=$(cat /tmp/selenium_chart_version)" >> $GITHUB_ENV
+          echo "CHART_FILE_NAME=$(basename $(cat /tmp/selenium_chart_version))" >> $GITHUB_ENV
       - name: Test Selenium Grid on Kubernetes with Autoscaling
         uses: nick-invision/retry@master
         with:
           timeout_minutes: 30
           max_attempts: 3
           command: |
-            NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_chaos
-      - name: Upload results
-        if: always()
-        uses: actions/upload-artifact@main
-        with:
-          name: chart_test_autoscaling_job_count_chaos
-          path: ./tests/tests/*.md
-          if-no-files-found: ignore
-      - name: Test Selenium Grid on Kubernetes with Autoscaling
-        uses: nick-invision/retry@master
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          command: |
-            NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_max_sessions
-      - name: Upload results
-        if: always()
-        uses: actions/upload-artifact@main
-        with:
-          name: chart_test_autoscaling_job_count_max_sessions
-          path: ./tests/tests/*.md
-          if-no-files-found: ignore
-      - name: Test Selenium Grid on Kubernetes with Autoscaling
-        uses: nick-invision/retry@master
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          command: |
-            NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_strategy_accurate
-      - name: Upload results
-        if: always()
-        uses: actions/upload-artifact@main
-        with:
-          name: chart_test_autoscaling_job_count_strategy_accurate
-          path: ./tests/tests/*.md
-          if-no-files-found: ignore
-      - name: Test Selenium Grid on Kubernetes with Autoscaling
-        uses: nick-invision/retry@master
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          command: |
-            NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count
+            NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make ${{ matrix.test-strategy }}
       - name: Upload results
         if: always()
         uses: actions/upload-artifact@main
         with:
-          name: chart_test_autoscaling_job_count
-          path: ./tests/tests/*.md
+          name: ${{ matrix.test-strategy }}.md
+          path: ./tests/tests/scale_up_results.md
           if-no-files-found: ignore
       - name: Cleanup Kubernetes cluster
         if: always()
 
@@ -29,7 +29,7 @@ SBOM_OUTPUT := $(or $(SBOM_OUTPUT),$(SBOM_OUTPUT),package_versions.txt)
 KEDA_TAG_PREV_VERSION := $(or $(KEDA_TAG_PREV_VERSION),$(KEDA_TAG_PREV_VERSION),2.16.0-selenium-grid)
 KEDA_TAG_VERSION := $(or $(KEDA_TAG_VERSION),$(KEDA_TAG_VERSION),2.16.0-selenium-grid)
 KEDA_BASED_NAME := $(or $(KEDA_BASED_NAME),$(KEDA_BASED_NAME),ndviet)
-KEDA_BASED_TAG := $(or $(KEDA_BASED_TAG),$(KEDA_BASED_TAG),2.16.0-selenium-grid-20241127)
+KEDA_BASED_TAG := $(or $(KEDA_BASED_TAG),$(KEDA_BASED_TAG),2.16.0-selenium-grid-20241128)
 
 all: hub \
 	distributor \
@@ -961,6 +961,36 @@ chart_test_autoscaling_playwright_connect_grid:
 	TEMPLATE_OUTPUT_FILENAME="k8s_playwright_connect_grid_basicAuth_secureIngress_ingressPublicIP_autoScaling_patchKEDA.yaml" \
 	./tests/charts/make/chart_test.sh JobAutoscaling
 
+chart_test_autoscaling_job_count_chaos:
+	MATRIX_TESTS=AutoScalingTestsScaleChaos \
+	make chart_test_autoscaling_job_count
+
+chart_test_autoscaling_job_count_max_sessions:
+	MAX_SESSIONS_FIREFOX=2 MAX_SESSIONS_EDGE=2 MAX_SESSIONS_CHROME=2 \
+	make chart_test_autoscaling_job_count
+
+chart_test_autoscaling_job_count:
+	MATRIX_TESTS=$(or $(MATRIX_TESTS), "AutoscalingTestsScaleUp") SCALING_STRATEGY=$(or $(SCALING_STRATEGY), "default") \
+	PLATFORMS=$(PLATFORMS) RELEASE_NAME=selenium TEST_PATCHED_KEDA=true SELENIUM_GRID_PROTOCOL=http SELENIUM_GRID_HOST=localhost SELENIUM_GRID_PORT=80 \
+	SELENIUM_GRID_MONITORING=false CLEAR_POD_HISTORY=true SET_MAX_REPLICAS=100 ENABLE_VIDEO_RECORDER=false \
+	VERSION=$(TAG_VERSION) VIDEO_TAG=$(FFMPEG_TAG_VERSION)-$(BUILD_DATE) KEDA_BASED_NAME=$(KEDA_BASED_NAME) KEDA_BASED_TAG=$(KEDA_BASED_TAG) NAMESPACE=$(NAMESPACE) BINDING_VERSION=$(BINDING_VERSION) BASE_VERSION=$(BASE_VERSION) \
+	./tests/charts/make/chart_test.sh JobAutoscaling
+
+chart_test_autoscaling_deployment_count_chaos:
+	MATRIX_TESTS=AutoScalingTestsScaleChaos \
+	make chart_test_autoscaling_deployment_count
+
+chart_test_autoscaling_deployment_count_max_sessions:
+	MAX_SESSIONS_FIREFOX=2 MAX_SESSIONS_EDGE=2 MAX_SESSIONS_CHROME=2 \
+	make chart_test_autoscaling_deployment_count
+
+chart_test_autoscaling_deployment_count:
+	MATRIX_TESTS=$(or $(MATRIX_TESTS), "AutoscalingTestsScaleUp") \
+	PLATFORMS=$(PLATFORMS) RELEASE_NAME=selenium TEST_PATCHED_KEDA=true SELENIUM_GRID_PROTOCOL=http SELENIUM_GRID_HOST=localhost SELENIUM_GRID_PORT=80 \
+	SELENIUM_GRID_MONITORING=false CLEAR_POD_HISTORY=true SET_MAX_REPLICAS=100 ENABLE_VIDEO_RECORDER=false \
+	VERSION=$(TAG_VERSION) VIDEO_TAG=$(FFMPEG_TAG_VERSION)-$(BUILD_DATE) KEDA_BASED_NAME=$(KEDA_BASED_NAME) KEDA_BASED_TAG=$(KEDA_BASED_TAG) NAMESPACE=$(NAMESPACE) BINDING_VERSION=$(BINDING_VERSION) BASE_VERSION=$(BASE_VERSION) \
+	./tests/charts/make/chart_test.sh DeploymentAutoscaling
+
 chart_test_delete:
 	helm del test -n selenium || true
 	helm del selenium -n selenium || true
 
@@ -0,0 +1,90 @@
+import unittest
+import random
+import time
+import subprocess
+import signal
+import concurrent.futures
+import csv
+import os
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options as FirefoxOptions
+from selenium.webdriver.edge.options import Options as EdgeOptions
+from selenium.webdriver.chrome.options import Options as ChromeOptions
+from selenium.webdriver.remote.client_config import ClientConfig
+from csv2md.table import Table
+
+BROWSER = {
+    "chrome": ChromeOptions(),
+    "firefox": FirefoxOptions(),
+    "edge": EdgeOptions(),
+}
+
+CLIENT_CONFIG = ClientConfig(
+    remote_server_addr=f"http://localhost/selenium/wd/hub",
+    keep_alive=True,
+    timeout=3600,
+)
+
+FIELD_NAMES = ["Iteration", "New request sessions", "Requests accepted time", "Sessions failed", "New scaled pods", "Total sessions", "Total pods", "Gaps"]
+
+def get_pod_count():
+    result = subprocess.run(["kubectl", "get", "pods", "-A", "--no-headers"], capture_output=True, text=True)
+    return len([line for line in result.stdout.splitlines() if "selenium-node-" in line and "Running" in line])
+
+def create_session(browser_name):
+    return webdriver.Remote(command_executor=CLIENT_CONFIG.remote_server_addr, options=BROWSER[browser_name], client_config=CLIENT_CONFIG)
+
+def wait_for_count_matches(sessions, timeout=10, interval=5):
+    elapsed = 0
+    while elapsed < timeout:
+        pod_count = get_pod_count()
+        if pod_count == len(sessions):
+            break
+        print(f"VALIDATING: Waiting for pods to match sessions... ({elapsed}/{timeout} seconds elapsed)")
+        time.sleep(interval)
+        elapsed += interval
+    if pod_count != len(sessions):
+        print(f"WARN: Mismatch between pod count and session count after {timeout} seconds. Gaps: {pod_count - len(sessions)}")
+    else:
+        print(f"PASS: Pod count matches session count after {elapsed} seconds.")
+
+def close_all_sessions(sessions):
+    for session in sessions:
+        session.quit()
+    sessions.clear()
+    return sessions
+
+def create_sessions_in_parallel(new_request_sessions):
+    failed_jobs = 0
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(create_session, random.choice(list(BROWSER.keys()))) for _ in range(new_request_sessions)]
+        sessions = []
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                sessions.append(future.result())
+            except Exception as e:
+                print(f"ERROR: Failed to create session: {e}")
+                failed_jobs += 1
+    print(f"Total failed jobs: {failed_jobs}")
+    return sessions
+
+def randomly_quit_sessions(sessions, sublist_size):
+    if sessions:
+        sessions_to_quit = random.sample(sessions, min(sublist_size, len(sessions)))
+        for session in sessions_to_quit:
+            session.quit()
+            sessions.remove(session)
+        print(f"QUIT: {len(sessions_to_quit)} sessions have been randomly quit.")
+    return sessions
+
+def export_results_to_csv(output_file, field_names, results):
+    with open(output_file, mode="w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=field_names)
+        writer.writeheader()
+        writer.writerows(results)
+
+def export_results_csv_to_md(csv_file, md_file):
+    with open(csv_file) as f:
+        table = Table.parse_csv(f)
+    with open(md_file, mode="w") as f:
+        f.write(table.markdown())
@@ -0,0 +1,58 @@
+import unittest
+import random
+import time
+import signal
+import csv
+from csv2md.table import Table
+from .common import *
+
+SESSIONS = []
+RESULTS = []
+
+def signal_handler(signum, frame):
+    print("Signal received, quitting all sessions...")
+    close_all_sessions(SESSIONS)
+
+signal.signal(signal.SIGTERM, signal_handler)
+signal.signal(signal.SIGINT, signal_handler)
+
+class SeleniumAutoscalingTests(unittest.TestCase):
+    def test_run_tests(self):
+        try:
+            for iteration in range(20):
+                new_request_sessions = random.randint(3, 6)
+                start_time = time.time()
+                start_pods = get_pod_count()
+                new_sessions = create_sessions_in_parallel(new_request_sessions)
+                failed_sessions = new_request_sessions - len(new_sessions)
+                end_time = time.time()
+                stop_pods = get_pod_count()
+                SESSIONS.extend(new_sessions)
+                elapsed_time = end_time - start_time
+                new_scaled_pods = stop_pods - start_pods
+                total_sessions = len(SESSIONS)
+                total_pods = get_pod_count()
+                RESULTS.append({
+                    FIELD_NAMES[0]: iteration + 1,
+                    FIELD_NAMES[1]: new_request_sessions,
+                    FIELD_NAMES[2]: f"{elapsed_time:.2f} s",
+                    FIELD_NAMES[3]: failed_sessions,
+                    FIELD_NAMES[4]: new_scaled_pods,
+                    FIELD_NAMES[5]: total_sessions,
+                    FIELD_NAMES[6]: total_pods,
+                    FIELD_NAMES[7]: total_pods - total_sessions,
+                })
+                print(f"ADDING: Created {new_request_sessions} new sessions in {elapsed_time:.2f} seconds.")
+                print(f"INFO: Total sessions: {total_sessions}")
+                print(f"INFO: Total pods: {total_pods}")
+                randomly_quit_sessions(SESSIONS, random.randint(3, 12))
+                time.sleep(15)
+        finally:
+            print(f"FINISH: Closing {len(SESSIONS)} sessions.")
+            close_all_sessions(SESSIONS)
+            output_file = f"tests/scale_up_results"
+            export_results_to_csv(f"{output_file}.csv", FIELD_NAMES, RESULTS)
+            export_results_csv_to_md(f"{output_file}.csv", f"{output_file}.md")
+
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,59 @@
+import unittest
+import random
+import time
+import signal
+import csv
+from csv2md.table import Table
+from .common import *
+
+SESSIONS = []
+RESULTS = []
+
+def signal_handler(signum, frame):
+    print("Signal received, quitting all sessions...")
+    close_all_sessions(SESSIONS)
+
+signal.signal(signal.SIGTERM, signal_handler)
+signal.signal(signal.SIGINT, signal_handler)
+
+class SeleniumAutoscalingTests(unittest.TestCase):
+    def test_run_tests(self):
+        try:
+            for iteration in range(20):
+                new_request_sessions = random.randint(1, 3)
+                start_time = time.time()
+                start_pods = get_pod_count()
+                new_sessions = create_sessions_in_parallel(new_request_sessions)
+                failed_sessions = new_request_sessions - len(new_sessions)
+                end_time = time.time()
+                stop_pods = get_pod_count()
+                SESSIONS.extend(new_sessions)
+                elapsed_time = end_time - start_time
+                new_scaled_pods = stop_pods - start_pods
+                total_sessions = len(SESSIONS)
+                total_pods = get_pod_count()
+                RESULTS.append({
+                    FIELD_NAMES[0]: iteration + 1,
+                    FIELD_NAMES[1]: new_request_sessions,
+                    FIELD_NAMES[2]: f"{elapsed_time:.2f} s",
+                    FIELD_NAMES[3]: failed_sessions,
+                    FIELD_NAMES[4]: new_scaled_pods,
+                    FIELD_NAMES[5]: total_sessions,
+                    FIELD_NAMES[6]: total_pods,
+                    FIELD_NAMES[7]: total_pods - total_sessions,
+                })
+                print(f"ADDING: Created {new_request_sessions} new sessions in {elapsed_time:.2f} seconds.")
+                print(f"INFO: Total sessions: {total_sessions}")
+                print(f"INFO: Total pods: {total_pods}")
+                if iteration % 5 == 0:
+                    randomly_quit_sessions(SESSIONS, 20)
+                time.sleep(15)
+        finally:
+            print(f"FINISH: Closing {len(SESSIONS)} sessions.")
+            close_all_sessions(SESSIONS)
+            output_file = f"tests/scale_up_results"
+            export_results_to_csv(f"{output_file}.csv", FIELD_NAMES, RESULTS)
+            export_results_csv_to_md(f"{output_file}.csv", f"{output_file}.md")
+
+if __name__ == "__main__":
+    unittest.main()