Skip to content

Commit 405264f

Browse files
authored
fix: unit test issue (#380)
* fix: preemption unit test * fix: add write loc for gpu capacity map * fix: increase timeout for github action run
1 parent af17696 commit 405264f

File tree

6 files changed

+31
-23
lines changed

6 files changed

+31
-23
lines changed

.vscode/launch.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,19 @@
9797
},
9898
"program": "${workspaceFolder}/internal/webhook/v1",
9999
"console": "integratedTerminal"
100+
},
101+
{
102+
"name": "Run Unit Tests - Preemption",
103+
"type": "go",
104+
"request": "launch",
105+
"mode": "test",
106+
"env": {
107+
"DEBUG_MODE": "true",
108+
"GO_TESTING": "true",
109+
"ENVTEST_K8S_VERSION": "1.34.0"
110+
},
111+
"program": "${workspaceFolder}/test/sched",
112+
"console": "integratedTerminal"
100113
}
101114
]
102115
}

internal/controller/suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func TestControllers(t *testing.T) {
7979
SetDefaultConsistentlyDuration(5 * time.Second)
8080
SetDefaultConsistentlyPollingInterval(250 * time.Millisecond)
8181
suiteConfig, reporterConfig := GinkgoConfiguration()
82-
suiteConfig.Timeout = 5 * time.Minute
82+
suiteConfig.Timeout = 12 * time.Minute
8383
RunSpecs(t, "Controller Suite", suiteConfig, reporterConfig)
8484
}
8585

internal/gpuallocator/gpuallocator.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
const MaxGPUCounterPerAllocation = 128
4141
const CleanUpCheckInterval = 3 * time.Minute
4242

43+
var mu sync.Mutex
4344
var GPUCapacityMap = map[string]tfv1.Resource{}
4445

4546
type Strategy interface {
@@ -921,9 +922,11 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
921922

922923
if s.gpuStore[key] != nil {
923924
if gpu.Status.GPUModel != "" {
925+
mu.Lock()
924926
if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists {
925927
GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
926928
}
929+
mu.Unlock()
927930
}
928931
syncGPUMetadataAndStatusFromCluster(s.gpuStore[key], gpu)
929932
log.V(6).Info("GPU already exists in store", "name", key.Name)
@@ -1019,7 +1022,9 @@ func (s *GpuAllocator) addOrUpdateGPUMaps(gpuInMem *tfv1.GPU) {
10191022
}
10201023

10211024
if gpuInMem.Status.GPUModel != "" {
1025+
mu.Lock()
10221026
GPUCapacityMap[gpuInMem.Status.GPUModel] = *gpuInMem.Status.Capacity
1027+
mu.Unlock()
10231028
}
10241029
}
10251030

internal/scheduler/expander/handler_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ func (suite *NodeExpanderTestSuite) TearDownSuite() {
8383

8484
func TestNodeExpander(t *testing.T) {
8585
suiteConfig, reporterConfig := GinkgoConfiguration()
86-
suiteConfig.Timeout = 2 * time.Minute
86+
suiteConfig.Timeout = 3 * time.Minute
8787
RegisterFailHandler(Fail)
8888
if os.Getenv("DEBUG_MODE") == constants.TrueStringValue {
8989
SetDefaultEventuallyTimeout(10 * time.Minute)

scripts/benchmark.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Code level benchmark
22
# go test -bench=BenchmarkPodWebhookQPS -benchmem -v ./internal/webhook/v1/
33

4-
# go test -bench=BenchmarkGPUFitPlugin -benchmem ./test/sched/ --benchtime=3s
5-
# go test -bench=BenchmarkScheduler -benchmem ./test/sched/ --benchtime=5s
4+
# go test -bench=BenchmarkGPUFitPlugin -benchmem ./test/sched/ --benchtime=3s -tags=nobench
5+
# go test -bench=BenchmarkScheduler -benchmem ./test/sched/ --benchtime=5s -tags=nobench
66

77
# Real world benchmark for Mutating Webhook
88
cat > /tmp/webhook-body.json << 'EOF'

test/sched/preemption_test.go

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
//go:build !nobench
2+
13
package sched
24

35
import (
46
"context"
57
"fmt"
6-
"os"
78
"strings"
89
"testing"
910
"time"
@@ -25,17 +26,6 @@ import (
2526
"sigs.k8s.io/controller-runtime/pkg/envtest"
2627
)
2728

28-
// isRunningInGitHubActions checks if the tests are running in GitHub Actions environment
29-
func isRunningInGitHubActions() bool {
30-
if os.Getenv("GITHUB_ACTIONS") == "true" {
31-
return true
32-
}
33-
if os.Getenv("CI") == "true" {
34-
return true
35-
}
36-
return false
37-
}
38-
3929
// PreemptionTestSuite holds common test setup for preemption tests
4030
type PreemptionTestSuite struct {
4131
ctx context.Context
@@ -95,6 +85,9 @@ func (pts *PreemptionTestSuite) SetupSuite() {
9585
Expect(err).To(Succeed())
9686
pts.scheduler = scheduler
9787
scheduler.SchedulingQueue.Run(klog.FromContext(ctx))
88+
if scheduler.APIDispatcher != nil {
89+
scheduler.APIDispatcher.Run(klog.FromContext(ctx))
90+
}
9891

9992
// Start scheduler components
10093
cc.EventBroadcaster.StartRecordingToSink(ctx.Done())
@@ -105,6 +98,7 @@ func (pts *PreemptionTestSuite) SetupSuite() {
10598

10699
// TearDownSuite cleans up the test environment
107100
func (pts *PreemptionTestSuite) TearDownSuite() {
101+
time.Sleep(300 * time.Millisecond)
108102
if pts.cancel != nil {
109103
pts.cancel()
110104
}
@@ -122,7 +116,7 @@ func (pts *PreemptionTestSuite) TearDownSuite() {
122116
// TestPreemption tests comprehensive preemption scenarios
123117
func TestPreemption(t *testing.T) {
124118
suiteConfig, reporterConfig := GinkgoConfiguration()
125-
suiteConfig.Timeout = 1 * time.Minute
119+
suiteConfig.Timeout = 2 * time.Minute
126120
RegisterFailHandler(Fail)
127121
RunSpecs(t, "Preemption Test Suite", suiteConfig, reporterConfig)
128122
}
@@ -140,16 +134,10 @@ var _ = Describe("GPU Resource Preemption", func() {
140134
})
141135

142136
It("should preempt lower priority pods for higher priority ones", func() {
143-
if isRunningInGitHubActions() {
144-
Skip("Skipping preemption test in GitHub Actions environment")
145-
}
146137
testGPUResourcePreemption(suite)
147138
})
148139

149140
It("should respect eviction protection periods", func() {
150-
if isRunningInGitHubActions() {
151-
Skip("Skipping eviction protection test in GitHub Actions environment")
152-
}
153141
testGPUResourceEvictProtection(suite)
154142
})
155143
})
@@ -193,6 +181,8 @@ func testGPUResourcePreemption(suite *PreemptionTestSuite) {
193181
defer func() {
194182
_ = suite.k8sClient.Delete(suite.ctx, criticalPriorityPod)
195183
}()
184+
time.Sleep(10 * time.Millisecond)
185+
suite.scheduler.SchedulingQueue.Add(klog.FromContext(suite.ctx), criticalPriorityPod)
196186
suite.scheduler.ScheduleOne(suite.ctx)
197187

198188
// Preemption should be triggered and victims deleted, wait informer sync

0 commit comments

Comments
 (0)