Skip to content

Commit 8baeb25

Browse files
committed
Try 3 shards e2e
1 parent 6aaf154 commit 8baeb25

File tree

8 files changed

+32
-9
lines changed

8 files changed

+32
-9
lines changed

.github/workflows/on-pr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ jobs:
275275
276276
- name: Run parallel e2e tests
277277
run: |
278-
ginkgo -r --keep-going --randomize-all --randomize-suites --procs=2 --trace -vv --label-filter '!autoscale && !scale' ./test/e2e/suites
278+
ginkgo -r --keep-going --randomize-all --randomize-suites --procs=3 --trace -vv --label-filter '!autoscale && !scale' ./test/e2e/suites
279279
280280
- name: Uninstall KAI-scheduler
281281
run: |

hack/parallel_e2e_config/set_parallel_testing_shards.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,17 @@ if [ -z "$NODES_TO_LABEL" ]; then
2727
exit 1
2828
fi
2929

30-
echo "Labeling nodes: $NODES_TO_LABEL for scheduling shard test-pool-2"
31-
kubectl label nodes $NODES_TO_LABEL kai.scheduler/node-pool=test-pool-2 --overwrite
30+
POOL_2_NODES=$(echo "$ALL_WORKER_NODES" | head -n 1 | tr '\n' ' ')
31+
POOL_3_NODES=$(echo "$ALL_WORKER_NODES" | tail -n +2 | head -n 1 | tr '\n' ' ')
32+
33+
echo "Labeling nodes: $POOL_2_NODES for scheduling shard test-pool-2"
34+
kubectl label nodes $POOL_2_NODES kai.scheduler/node-pool=test-pool-2 --overwrite
3235

3336
echo "Create the scheduling shard test-shard-2.yaml"
34-
kubectl apply -f ${REPO_ROOT}/hack/parallel_e2e_config/test-shard-2.yaml
37+
kubectl apply -f ${REPO_ROOT}/hack/parallel_e2e_config/test-shard-2.yaml
38+
39+
echo "Labeling nodes: $POOL_3_NODES for scheduling shard test-pool-3"
40+
kubectl label nodes $POOL_3_NODES kai.scheduler/node-pool=test-pool-3 --overwrite
41+
42+
echo "Create the scheduling shard test-shard-3.yaml"
43+
kubectl apply -f ${REPO_ROOT}/hack/parallel_e2e_config/test-shard-3.yaml
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: kai.scheduler/v1
5+
kind: SchedulingShard
6+
metadata:
7+
name: test-shard-3
8+
spec:
9+
args:
10+
restrict-node-scheduling: "false"
11+
partitionLabelValue: test-pool-3
12+
placementStrategy:
13+
cpu: binpack
14+
gpu: binpack

test/e2e/suites/allocate/node_order/affinity_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ var _ = Describe("Affinity", Ordered, func() {
104104
Expect(testedPod.Spec.NodeName).To(Equal(pod.Spec.NodeName))
105105
})
106106

107-
It("schedules the new pod NOT with matching labels pod anti-affinity", func(ctx context.Context) {
107+
It("schedules the new pod NOT with matching labels pod anti-affinity", Serial, func(ctx context.Context) {
108108
testedPod := rd.CreatePodObject(testCtx.Queues[0], v1.ResourceRequirements{})
109109
testedPod.Name = "pod-with-anti-affinity-" + pod.Name
110110
testedPod.Spec.Affinity = &v1.Affinity{

test/e2e/suites/allocate/node_order/placement_strategy_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ const (
3535
gpuIndexEnvVarName = "NVIDIA_VISIBLE_DEVICES"
3636
)
3737

38-
var _ = Describe("Placement strategy", Label(labels.Operated), Ordered, func() {
38+
var _ = Describe("Placement strategy", Label(labels.Operated), Serial, Ordered, func() {
3939
var (
4040
testCtx *testcontext.TestContext
4141
)

test/e2e/suites/allocate/topology/topology_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
)
2929

30-
var _ = Describe("Topology", Ordered, func() {
30+
var _ = Describe("Topology", Serial, Ordered, func() {
3131
var (
3232
testCtx *testcontext.TestContext
3333
gpuNodesNames []string

test/e2e/suites/preempt/preempt_distributed_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/wait"
2828
)
2929

30-
var _ = Describe("preempt Distributed Jobs", Ordered, func() {
30+
var _ = Describe("preempt Distributed Jobs", Serial, Ordered, func() {
3131
Context("Over more than one nodes", func() {
3232
var (
3333
testCtx *testcontext.TestContext

test/e2e/suites/reclaim/reclaim_distributed_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import (
2626
"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/wait"
2727
)
2828

29-
var _ = Describe("Reclaim Distributed Jobs", Ordered, func() {
29+
var _ = Describe("Reclaim Distributed Jobs", Serial, Ordered, func() {
3030
Context("Over more than one nodes", func() {
3131
var (
3232
testCtx *testcontext.TestContext

0 commit comments

Comments
 (0)