Skip to content

Commit b0d42f5

Browse files
authored
skip to search pod when count of gpu devices is only one (#46)
1 parent 5b68fe2 commit b0d42f5

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

cmd/nvidia/main.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,20 @@ package main
33
import (
44
"flag"
55
"fmt"
6+
"io/ioutil"
7+
"time"
8+
69
"github.com/AliyunContainerService/gpushare-device-plugin/pkg/gpu/nvidia"
710
"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
811
log "github.com/golang/glog"
9-
"io/ioutil"
1012
"k8s.io/client-go/rest"
11-
"time"
1213
)
1314

1415
var (
1516
mps = flag.Bool("mps", false, "Enable or Disable MPS")
1617
healthCheck = flag.Bool("health-check", false, "Enable or disable Health check")
1718
memoryUnit = flag.String("memory-unit", "GiB", "Set memoryUnit of the GPU Memroy, support 'GiB' and 'MiB'")
18-
queryFromKubelet = flag.Bool("query-kubelet", true, "Query pending pods from kubelet instead of kube-apiserver")
19+
queryFromKubelet = flag.Bool("query-kubelet", false, "Query pending pods from kubelet instead of kube-apiserver")
1920
kubeletAddress = flag.String("kubelet-address", "0.0.0.0", "Kubelet IP Address")
2021
kubeletPort = flag.Uint("kubelet-port", 10250, "Kubelet listened Port")
2122
clientCert = flag.String("client-cert", "", "Kubelet TLS client certificate")

pkg/gpu/nvidia/allocate.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,34 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
148148
}
149149
}
150150

151+
} else if len(m.devNameMap) == 1 {
152+
var devName string
153+
var devIndex uint
154+
for d, index := range m.devNameMap {
155+
devName = d
156+
devIndex = index
157+
break
158+
}
159+
log.Infof("this node has only one gpu device,skip to search pod and directly specify the device %v(%v) for container", devIndex, devName)
160+
for _, req := range reqs.ContainerRequests {
161+
reqGPU := uint(len(req.DevicesIDs))
162+
response := pluginapi.ContainerAllocateResponse{
163+
Envs: map[string]string{
164+
envNVGPU: devName,
165+
EnvResourceIndex: fmt.Sprintf("%d", devIndex),
166+
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
167+
EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
168+
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
169+
},
170+
}
171+
if m.disableCGPUIsolation {
172+
response.Envs["CGPU_DISABLE"] = "true"
173+
}
174+
responses.ContainerResponses = append(responses.ContainerResponses, &response)
175+
}
176+
log.Infof("get allocated GPUs info %v", responses)
177+
return &responses, nil
178+
151179
} else {
152180
log.Warningf("invalid allocation requst: request GPU memory %d can't be satisfied.",
153181
podReqGPU)

0 commit comments

Comments
 (0)