Skip to content

Commit 30d36cc

Browse files
committed
add 5000 node dra test
1 parent 86eb10d commit 30d36cc

File tree

1 file changed

+96
-1
lines changed

1 file changed

+96
-1
lines changed

config/jobs/kubernetes/sig-scalability/DRA/sig-scalability-periodic-dra.yaml

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ periodics:
300300
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
301301
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
302302
- --test-cmd-args=--testconfig=testing/dra/config.yaml
303-
- --test-cmd-args=--report-dir=${ARTIFACTS}
303+
- --test-cmd-args=--report-dir=$(ARTIFACTS)
304304
- --test-cmd-name=ClusterLoaderV2
305305
- --use-logexporter
306306
resources:
@@ -323,3 +323,98 @@ periodics:
323323
value: "true"
324324
- name: NODE_PRELOAD_IMAGES
325325
value: "gcr.io/k8s-staging-perf-tests/sleep:v0.0.3"
326+
327+
- name: ci-kubernetes-e2e-gce-5000-node-dra-with-workload
328+
cluster: k8s-infra-prow-build
329+
tags:
330+
- "perfDashPrefix: gce-dra-5000Nodes-with-workload"
331+
- "perfDashBuildsCount: 270"
332+
- "perfDashJobType: performance"
333+
# TODO (alaypatel07): increase this interval once stable
334+
interval: 24h
335+
labels:
336+
preset-service-account: "true"
337+
preset-k8s-ssh: "true"
338+
preset-e2e-scalability-common: "true"
339+
preset-e2e-scalability-periodics: "true"
340+
preset-e2e-scalability-periodics-master: "true"
341+
annotations:
342+
testgrid-dashboards: sig-scalability-dra
343+
testgrid-tab-name: gce-dra-with-workload-master-scalability-5000
344+
testgrid-alert-email: [email protected], [email protected]
345+
testgrid-num-failures-to-alert: '2'
346+
description: "Uses kubetest to run k8s.io/perf-tests/run-e2e.sh against a 5000-node cluster with DRA enabled"
347+
decorate: true
348+
decoration_config:
349+
timeout: 8h
350+
extra_refs:
351+
- org: kubernetes
352+
repo: kubernetes
353+
base_ref: master
354+
path_alias: k8s.io/kubernetes
355+
- org: kubernetes
356+
repo: perf-tests
357+
base_ref: master
358+
path_alias: k8s.io/perf-tests
359+
spec:
360+
containers:
361+
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250925-95b5a2c7a5-master
362+
command:
363+
- runner.sh
364+
- /workspace/scenarios/kubernetes_e2e.py
365+
args:
366+
- --cluster=gce-scale-cluster
367+
- --env=HEAPSTER_MACHINE_TYPE=e2-standard-32
368+
# TODO(mborsz): Adjust or remove this change once we understand coredns
369+
# memory usage regression.
370+
- --env=KUBE_DNS_MEMORY_LIMIT=300Mi
371+
- --extract=ci/fast/latest-fast
372+
- --gcp-nodes=5000
373+
- --gcp-project-type=scalability-scale-project
374+
- --gcp-zone=us-east1-b
375+
- --provider=gce
376+
- --metadata-sources=cl2-metadata.json
377+
- --env=KUBE_FEATURE_GATES=DynamicResourceAllocation=true
378+
- --runtime-config=api/all=true
379+
- --test=false
380+
- --test-cmd=$GOPATH/src/k8s.io/perf-tests/run-e2e.sh
381+
- --test-cmd-args=cluster-loader2
382+
- --test-cmd-args=--nodes=5000
383+
- --test-cmd-args=--provider=gce
384+
- --test-cmd-args=--enable-prometheus-server=true
385+
- --test-cmd-args=--prometheus-scrape-node-exporter
386+
- --test-cmd-args=--experimental-gcp-snapshot-prometheus-disk=true
387+
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
388+
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
389+
- --test-cmd-args=--testconfig=testing/dra/config.yaml
390+
- --test-cmd-args=--report-dir=$(ARTIFACTS)
391+
- --test-cmd-args=--testoverrides=./testing/overrides/5000_nodes.yaml
392+
- --test-cmd-name=ClusterLoaderV2
393+
- --timeout=420m
394+
- --use-logexporter
395+
- --logexporter-gcs-path=gs://k8s-infra-scalability-tests-logs/$(JOB_NAME)/$(BUILD_ID)
396+
resources:
397+
requests:
398+
cpu: 6
399+
memory: "16Gi"
400+
limits:
401+
cpu: 6
402+
memory: "16Gi"
403+
env:
404+
- name: CL2_MODE
405+
value: "Indexed"
406+
- name: CL2_NODES_PER_NAMESPACE
407+
value: "2500"
408+
- name: CL2_JOB_RUNNING_TIME
409+
value: "3s"
410+
- name: CL2_LONG_JOB_RUNNING_TIME
411+
value: "240m"
412+
# TODO: 100 node gce test run with 10 and 5, can increase if needed
413+
# - name: CL2_LOAD_TEST_THROUGHPUT
414+
# value: "50"
415+
# - name: CL2_STEADY_STATE_QPS
416+
# value: "25"
417+
- name: PROMETHEUS_SCRAPE_KUBELETS
418+
value: "true"
419+
- name: NODE_PRELOAD_IMAGES
420+
value: "gcr.io/k8s-staging-perf-tests/sleep:v0.0.3"

0 commit comments

Comments
 (0)