Skip to content

Commit 9fc355a

Browse files
Add DRA support for GPU pod eviction during driver upgrades
Signed-off-by: Karthik Vetrivel <[email protected]>
1 parent 0d30fa9 commit 9fc355a

File tree

1,028 files changed

+88263
-26565
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,028 files changed

+88263
-26565
lines changed

go.mod

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ require (
99
github.com/stretchr/testify v1.11.1
1010
github.com/urfave/cli/v2 v2.27.7
1111
golang.org/x/sys v0.36.0
12-
k8s.io/api v0.33.2
13-
k8s.io/apimachinery v0.33.2
14-
k8s.io/client-go v0.33.2
12+
k8s.io/api v0.34.0
13+
k8s.io/apimachinery v0.34.0
14+
k8s.io/client-go v0.34.0
1515
k8s.io/kubectl v0.33.2
1616
)
1717

@@ -22,17 +22,17 @@ require (
2222
github.com/chai2010/gettext-go v1.0.2 // indirect
2323
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
2424
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
25-
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
25+
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
2626
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
27-
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
27+
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
2828
github.com/go-errors/errors v1.4.2 // indirect
2929
github.com/go-logr/logr v1.4.2 // indirect
3030
github.com/go-openapi/jsonpointer v0.21.0 // indirect
3131
github.com/go-openapi/jsonreference v0.20.2 // indirect
3232
github.com/go-openapi/swag v0.23.0 // indirect
3333
github.com/gogo/protobuf v1.3.2 // indirect
3434
github.com/google/btree v1.1.3 // indirect
35-
github.com/google/gnostic-models v0.6.9 // indirect
35+
github.com/google/gnostic-models v0.7.0 // indirect
3636
github.com/google/go-cmp v0.7.0 // indirect
3737
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
3838
github.com/google/uuid v1.6.0 // indirect
@@ -48,7 +48,7 @@ require (
4848
github.com/moby/sys/mountinfo v0.7.2 // indirect
4949
github.com/moby/term v0.5.0 // indirect
5050
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
51-
github.com/modern-go/reflect2 v1.0.2 // indirect
51+
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
5252
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
5353
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
5454
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
@@ -57,10 +57,12 @@ require (
5757
github.com/pmezard/go-difflib v1.0.0 // indirect
5858
github.com/russross/blackfriday/v2 v2.1.0 // indirect
5959
github.com/spf13/cobra v1.8.1 // indirect
60-
github.com/spf13/pflag v1.0.5 // indirect
60+
github.com/spf13/pflag v1.0.6 // indirect
6161
github.com/x448/float16 v0.8.4 // indirect
6262
github.com/xlab/treeprint v1.2.0 // indirect
6363
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
64+
go.yaml.in/yaml/v2 v2.4.2 // indirect
65+
go.yaml.in/yaml/v3 v3.0.4 // indirect
6466
golang.org/x/net v0.38.0 // indirect
6567
golang.org/x/oauth2 v0.27.0 // indirect
6668
golang.org/x/sync v0.12.0 // indirect
@@ -74,12 +76,12 @@ require (
7476
k8s.io/cli-runtime v0.33.2 // indirect
7577
k8s.io/component-base v0.33.2 // indirect
7678
k8s.io/klog/v2 v2.130.1 // indirect
77-
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
78-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
79-
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
79+
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
80+
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
81+
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
8082
sigs.k8s.io/kustomize/api v0.19.0 // indirect
8183
sigs.k8s.io/kustomize/kyaml v0.19.0 // indirect
8284
sigs.k8s.io/randfill v1.0.0 // indirect
83-
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
84-
sigs.k8s.io/yaml v1.4.0 // indirect
85+
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
86+
sigs.k8s.io/yaml v1.6.0 // indirect
8587
)

go.sum

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
2020
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2121
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
2222
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
23-
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
24-
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
23+
github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
24+
github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
2525
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4=
2626
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
27-
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
28-
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
27+
github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
28+
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
2929
github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
3030
github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
3131
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
@@ -44,9 +44,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
4444
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
4545
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
4646
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
47-
github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
48-
github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
49-
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
47+
github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
48+
github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
5049
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
5150
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
5251
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@@ -92,8 +91,9 @@ github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3
9291
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
9392
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
9493
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
95-
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
9694
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
95+
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
96+
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
9797
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
9898
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
9999
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
@@ -120,8 +120,9 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
120120
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
121121
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
122122
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
123-
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
124123
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
124+
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
125+
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
125126
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
126127
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
127128
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -147,6 +148,10 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
147148
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
148149
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
149150
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
151+
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
152+
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
153+
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
154+
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
150155
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
151156
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
152157
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
@@ -202,34 +207,33 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
202207
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
203208
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
204209
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
205-
k8s.io/api v0.33.2 h1:YgwIS5jKfA+BZg//OQhkJNIfie/kmRsO0BmNaVSimvY=
206-
k8s.io/api v0.33.2/go.mod h1:fhrbphQJSM2cXzCWgqU29xLDuks4mu7ti9vveEnpSXs=
207-
k8s.io/apimachinery v0.33.2 h1:IHFVhqg59mb8PJWTLi8m1mAoepkUNYmptHsV+Z1m5jY=
208-
k8s.io/apimachinery v0.33.2/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
210+
k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE=
211+
k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug=
212+
k8s.io/apimachinery v0.34.0 h1:eR1WO5fo0HyoQZt1wdISpFDffnWOvFLOOeJ7MgIv4z0=
213+
k8s.io/apimachinery v0.34.0/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
209214
k8s.io/cli-runtime v0.33.2 h1:koNYQKSDdq5AExa/RDudXMhhtFasEg48KLS2KSAU74Y=
210215
k8s.io/cli-runtime v0.33.2/go.mod h1:gnhsAWpovqf1Zj5YRRBBU7PFsRc6NkEkwYNQE+mXL88=
211-
k8s.io/client-go v0.33.2 h1:z8CIcc0P581x/J1ZYf4CNzRKxRvQAwoAolYPbtQes+E=
212-
k8s.io/client-go v0.33.2/go.mod h1:9mCgT4wROvL948w6f6ArJNb7yQd7QsvqavDeZHvNmHo=
216+
k8s.io/client-go v0.34.0 h1:YoWv5r7bsBfb0Hs2jh8SOvFbKzzxyNo0nSb0zC19KZo=
217+
k8s.io/client-go v0.34.0/go.mod h1:ozgMnEKXkRjeMvBZdV1AijMHLTh3pbACPvK7zFR+QQY=
213218
k8s.io/component-base v0.33.2 h1:sCCsn9s/dG3ZrQTX/Us0/Sx2R0G5kwa0wbZFYoVp/+0=
214219
k8s.io/component-base v0.33.2/go.mod h1:/41uw9wKzuelhN+u+/C59ixxf4tYQKW7p32ddkYNe2k=
215220
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
216221
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
217-
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4=
218-
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
222+
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
223+
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
219224
k8s.io/kubectl v0.33.2 h1:7XKZ6DYCklu5MZQzJe+CkCjoGZwD1wWl7t/FxzhMz7Y=
220225
k8s.io/kubectl v0.33.2/go.mod h1:8rC67FB8tVTYraovAGNi/idWIK90z2CHFNMmGJZJ3KI=
221-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
222-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
223-
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
224-
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
226+
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
227+
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
228+
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
229+
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
225230
sigs.k8s.io/kustomize/api v0.19.0 h1:F+2HB2mU1MSiR9Hp1NEgoU2q9ItNOaBJl0I4Dlus5SQ=
226231
sigs.k8s.io/kustomize/api v0.19.0/go.mod h1:/BbwnivGVcBh1r+8m3tH1VNxJmHSk1PzP5fkP6lbL1o=
227232
sigs.k8s.io/kustomize/kyaml v0.19.0 h1:RFge5qsO1uHhwJsu3ipV7RNolC7Uozc0jUBC/61XSlA=
228233
sigs.k8s.io/kustomize/kyaml v0.19.0/go.mod h1:FeKD5jEOH+FbZPpqUghBP8mrLjJ3+zD3/rf9NNu1cwY=
229-
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
230234
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
231235
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
232-
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
233-
sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
234-
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
235-
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
236+
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
237+
sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
238+
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
239+
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=

internal/kubernetes/claim_cache.go

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package kubernetes
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"sync"
23+
"time"
24+
25+
"github.com/sirupsen/logrus"
26+
resourcev1 "k8s.io/api/resource/v1"
27+
"k8s.io/apimachinery/pkg/types"
28+
"k8s.io/client-go/informers"
29+
"k8s.io/client-go/kubernetes"
30+
"k8s.io/client-go/tools/cache"
31+
)
32+
33+
// ResourceClaimCache watches NVIDIA GPU ResourceClaims and maintains a map of pod UIDs
34+
// that are using GPU resources
35+
type ResourceClaimCache struct {
36+
mu sync.RWMutex
37+
podUIDs map[types.UID]struct{}
38+
39+
informerFactory informers.SharedInformerFactory
40+
stopCh chan struct{}
41+
synced bool
42+
log *logrus.Logger
43+
}
44+
45+
// NewResourceClaimCache creates a new ResourceClaimCache that watches ResourceClaims
46+
// allocated by the NVIDIA DRA driver.
47+
func NewResourceClaimCache(clientset *kubernetes.Clientset, log *logrus.Logger) *ResourceClaimCache {
48+
rcc := &ResourceClaimCache{
49+
podUIDs: make(map[types.UID]struct{}),
50+
stopCh: make(chan struct{}),
51+
log: log,
52+
}
53+
54+
// resync every 30 minutes
55+
rcc.informerFactory = informers.NewSharedInformerFactory(clientset, 30*time.Minute)
56+
claimInformer := rcc.informerFactory.Resource().V1().ResourceClaims().Informer()
57+
58+
claimInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
59+
AddFunc: rcc.handleClaim(true),
60+
UpdateFunc: rcc.onClaimUpdate,
61+
DeleteFunc: rcc.handleClaim(false),
62+
})
63+
64+
return rcc
65+
}
66+
67+
// Start begins watching ResourceClaims. Call this after creating the cache.
68+
func (rcc *ResourceClaimCache) Start(ctx context.Context) error {
69+
rcc.informerFactory.Start(rcc.stopCh)
70+
71+
// Wait for cache sync
72+
syncCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
73+
defer cancel()
74+
75+
synced := rcc.informerFactory.WaitForCacheSync(syncCtx.Done())
76+
for informerType, ok := range synced {
77+
if !ok {
78+
return fmt.Errorf("failed to sync informer cache for %v", informerType)
79+
}
80+
}
81+
82+
// Another Go routine may call IsSynced() concurrently
83+
rcc.mu.Lock()
84+
rcc.synced = true
85+
rcc.mu.Unlock()
86+
87+
rcc.log.Info("ResourceClaim cache synced successfully")
88+
89+
// Handle graceful shutdown
90+
go func() {
91+
<-ctx.Done()
92+
close(rcc.stopCh)
93+
}()
94+
95+
return nil
96+
}
97+
98+
// IsSynced returns true if the informer cache has completed initial sync.
99+
func (rcc *ResourceClaimCache) IsSynced() bool {
100+
rcc.mu.RLock()
101+
defer rcc.mu.RUnlock()
102+
return rcc.synced
103+
}
104+
105+
// PodUsesNvidiaGPU returns true if the pod with the given UID has reserved an NVIDIA GPU claim.
106+
func (rcc *ResourceClaimCache) PodUsesNvidiaGPU(podUID types.UID) bool {
107+
rcc.mu.RLock()
108+
defer rcc.mu.RUnlock()
109+
_, exists := rcc.podUIDs[podUID]
110+
return exists
111+
}
112+
113+
func (rcc *ResourceClaimCache) handleClaim(add bool) func(obj interface{}) {
114+
return func(obj interface{}) {
115+
claim, ok := obj.(*resourcev1.ResourceClaim)
116+
if !ok {
117+
return
118+
}
119+
rcc.updatePodUIDs(claim, add)
120+
}
121+
}
122+
123+
func (rcc *ResourceClaimCache) onClaimUpdate(oldObj, newObj interface{}) {
124+
oldClaim, ok := oldObj.(*resourcev1.ResourceClaim)
125+
if !ok {
126+
return
127+
}
128+
newClaim, ok := newObj.(*resourcev1.ResourceClaim)
129+
if !ok {
130+
return
131+
}
132+
133+
// Remove old pod UIDs and add new ones
134+
rcc.updatePodUIDs(oldClaim, false)
135+
rcc.updatePodUIDs(newClaim, true)
136+
}
137+
138+
// updatePodUIDs adds or removes pod UIDs from the cache based on the claim's reservedFor field.
139+
func (rcc *ResourceClaimCache) updatePodUIDs(claim *resourcev1.ResourceClaim, add bool) {
140+
if !rcc.isNvidiaGPUClaim(claim) {
141+
return
142+
}
143+
144+
rcc.mu.Lock()
145+
defer rcc.mu.Unlock()
146+
147+
for _, ref := range claim.Status.ReservedFor {
148+
if ref.Resource != "pods" {
149+
continue
150+
}
151+
if add {
152+
rcc.podUIDs[ref.UID] = struct{}{}
153+
} else {
154+
delete(rcc.podUIDs, ref.UID)
155+
}
156+
}
157+
}
158+
159+
// isNvidiaGPUClaim checks if a ResourceClaim is allocated by the NVIDIA GPU DRA driver.
160+
func (rcc *ResourceClaimCache) isNvidiaGPUClaim(claim *resourcev1.ResourceClaim) bool {
161+
if claim.Status.Allocation == nil {
162+
return false
163+
}
164+
165+
for _, result := range claim.Status.Allocation.Devices.Results {
166+
if result.Driver == nvidiaDRADriverName {
167+
return true
168+
}
169+
}
170+
return false
171+
}

0 commit comments

Comments
 (0)