Skip to content

Commit 79e33c2

Browse files
authored
Merge pull request kata-containers#7325 from zvonkok/vfio-sandbox-id-debug
gpu: Adding CDI support for cold and hot-plug of VFIO devices
2 parents f7ef45b + 7c934dc commit 79e33c2

216 files changed

Lines changed: 29477 additions & 1946 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/runtime/go.mod

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ require (
77
github.com/BurntSushi/toml v1.2.0
88
github.com/blang/semver v3.5.1+incompatible
99
github.com/blang/semver/v4 v4.0.0
10+
github.com/container-orchestrated-devices/container-device-interface v0.6.0
1011
github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682
1112
github.com/containerd/console v1.0.3
1213
github.com/containerd/containerd v1.6.8
@@ -31,7 +32,7 @@ require (
3132
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9
3233
github.com/mdlayher/vsock v1.1.0
3334
github.com/opencontainers/runc v1.1.3
34-
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
35+
github.com/opencontainers/runtime-spec v1.1.0-rc.1
3536
github.com/opencontainers/selinux v1.10.1
3637
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
3738
github.com/pkg/errors v0.9.1
@@ -51,7 +52,7 @@ require (
5152
go.opentelemetry.io/otel/sdk v1.3.0
5253
go.opentelemetry.io/otel/trace v1.3.0
5354
golang.org/x/oauth2 v0.0.0-20220622183110-fd043fe589d2
54-
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f
55+
golang.org/x/sys v0.1.0
5556
google.golang.org/grpc v1.47.0
5657
k8s.io/apimachinery v0.22.5
5758
k8s.io/cri-api v0.23.1
@@ -93,11 +94,14 @@ require (
9394
github.com/oklog/ulid v1.3.1 // indirect
9495
github.com/opencontainers/go-digest v1.0.0 // indirect
9596
github.com/opencontainers/image-spec v1.0.3-0.20220114050600-8b9d41f48198 // indirect
97+
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
9698
github.com/pmezard/go-difflib v1.0.0 // indirect
9799
github.com/rogpeppe/go-internal v1.8.1-0.20210923151022-86f73c517451 // indirect
98100
github.com/russross/blackfriday/v2 v2.1.0 // indirect
101+
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
99102
go.mongodb.org/mongo-driver v1.7.5 // indirect
100103
go.opencensus.io v0.23.0 // indirect
104+
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect
101105
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
102106
golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f // indirect
103107
golang.org/x/text v0.3.7 // indirect
@@ -107,6 +111,7 @@ require (
107111
gopkg.in/inf.v0 v0.9.1 // indirect
108112
gopkg.in/yaml.v2 v2.4.0 // indirect
109113
gopkg.in/yaml.v3 v3.0.1 // indirect
114+
sigs.k8s.io/yaml v1.3.0 // indirect
110115
)
111116

112117
replace (

src/runtime/go.sum

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoC
248248
github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI=
249249
github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4=
250250
github.com/container-orchestrated-devices/container-device-interface v0.4.0/go.mod h1:E1zcucIkq9P3eyNmY+68dBQsTcsXJh9cgRo2IVNScKQ=
251+
github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk=
252+
github.com/container-orchestrated-devices/container-device-interface v0.6.0/go.mod h1:OQlgtJtDrOxSQ1BWODC8OZK1tzi9W69wek+Jy17ndzo=
251253
github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE=
252254
github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU=
253255
github.com/containerd/aufs v0.0.0-20210316121734-20793ff83c97/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU=
@@ -1205,11 +1207,15 @@ github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
12051207
github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
12061208
github.com/opencontainers/runtime-spec v1.0.3-0.20201121164853-7413a7f753e1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
12071209
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
1208-
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab h1:YQZXa3elcHgKXAa2GjVFC9M3JeP7ZPyFD1YByDx/dgQ=
12091210
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
1211+
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
1212+
github.com/opencontainers/runtime-spec v1.1.0-rc.1 h1:wHa9jroFfKGQqFHj0I1fMRKLl0pfj+ynAqBxo3v6u9w=
1213+
github.com/opencontainers/runtime-spec v1.1.0-rc.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
12101214
github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs=
12111215
github.com/opencontainers/runtime-tools v0.0.0-20190417131837-cd1349b7c47e/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs=
12121216
github.com/opencontainers/runtime-tools v0.9.1-0.20220714195903-17b3287fafb7/go.mod h1:/tgP02fPXGHkU3/qKK1Y0Db4yqNyGm03vLq/mzHzcS4=
1217+
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
1218+
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
12131219
github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE=
12141220
github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo=
12151221
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8=
@@ -1401,6 +1407,7 @@ github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69
14011407
github.com/sylabs/sif/v2 v2.7.1/go.mod h1:bBse2nEFd3yHkmq6KmAOFEWQg5LdFYiQUdVcgamxlc8=
14021408
github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
14031409
github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
1410+
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
14041411
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
14051412
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
14061413
github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc=
@@ -1459,9 +1466,12 @@ github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6
14591466
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
14601467
github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
14611468
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
1469+
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
14621470
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
1471+
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
14631472
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
14641473
github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs=
1474+
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
14651475
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
14661476
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
14671477
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
@@ -1629,6 +1639,7 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
16291639
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
16301640
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
16311641
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
1642+
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o=
16321643
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY=
16331644
golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
16341645
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -1893,8 +1904,8 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
18931904
golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
18941905
golang.org/x/sys v0.0.0-20220624220833-87e55d714810/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
18951906
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1896-
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
1897-
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1907+
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
1908+
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
18981909
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
18991910
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
19001911
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -2230,4 +2241,5 @@ sigs.k8s.io/structured-merge-diff/v4 v4.0.3/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK
22302241
sigs.k8s.io/structured-merge-diff/v4 v4.1.2/go.mod h1:j/nl6xW8vLS49O8YvXW1ocPhZawJtm+Yrr7PPRQ0Vg4=
22312242
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
22322243
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=
2244+
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
22332245
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=

src/runtime/pkg/containerd-shim-v2/create.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"strings"
2020
"syscall"
2121

22+
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
2223
containerd_types "github.com/containerd/containerd/api/types"
2324
"github.com/containerd/containerd/mount"
2425
taskAPI "github.com/containerd/containerd/runtime/v2/task"
@@ -73,6 +74,63 @@ func copyLayersToMounts(rootFs *vc.RootFs, spec *specs.Spec) error {
7374
return nil
7475
}
7576

77+
// CDI (Container Device Interface), is a specification, for container- runtimes,
78+
// to support third-party devices.
79+
// It introduces an abstract notion of a device as a resource. Such devices are
80+
// uniquely specified by a fully-qualified name that is constructed from a
81+
// vendor ID, a device class, and a name that is unique per vendor ID-device
82+
// class pair.
83+
//
84+
// vendor.com/class=unique_name
85+
//
86+
// The combination of vendor ID and device class (vendor.com/class in the
87+
// above example) is referred to as the device kind.
88+
// CDI concerns itself only with enabling containers to be device aware.
89+
// Areas like resource management are explicitly left out of CDI (and are
90+
// expected to be handled by the orchestrator). Because of this focus, the CDI
91+
// specification is simple to implement and allows great flexibility for
92+
// runtimes and orchestrators.
93+
func withCDI(annotations map[string]string, cdiSpecDirs []string, spec *specs.Spec) (*specs.Spec, error) {
94+
// Add devices from CDI annotations
95+
_, devsFromAnnotations, err := cdi.ParseAnnotations(annotations)
96+
if err != nil {
97+
return nil, fmt.Errorf("failed to parse CDI device annotations: %w", err)
98+
}
99+
100+
if len(devsFromAnnotations) == 0 {
101+
// No devices found, skip device injection
102+
return spec, nil
103+
}
104+
105+
var registry cdi.Registry
106+
if len(cdiSpecDirs) > 0 {
107+
// We can override the directories where to search for CDI specs
108+
// if needed, the default is /etc/cdi /var/run/cdi
109+
registry = cdi.GetRegistry(cdi.WithSpecDirs(cdiSpecDirs...))
110+
} else {
111+
registry = cdi.GetRegistry()
112+
}
113+
114+
if err = registry.Refresh(); err != nil {
115+
// We don't consider registry refresh failure a fatal error.
116+
// For instance, a dynamically generated invalid CDI Spec file for
117+
// any particular vendor shouldn't prevent injection of devices of
118+
// different vendors. CDI itself knows better and it will fail the
119+
// injection if necessary.
120+
return nil, fmt.Errorf("CDI registry refresh failed: %w", err)
121+
}
122+
123+
if _, err := registry.InjectDevices(spec, devsFromAnnotations...); err != nil {
124+
return nil, fmt.Errorf("CDI device injection failed: %w", err)
125+
}
126+
127+
// One crucial thing to keep in mind is that CDI device injection
128+
// might add OCI Spec environment variables, hooks, and mounts as
129+
// well. Therefore it is important that none of the corresponding
130+
// OCI Spec fields are reset up in the call stack once we return.
131+
return spec, nil
132+
}
133+
76134
func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*container, error) {
77135
rootFs := vc.RootFs{}
78136
if len(r.Rootfs) == 1 {
@@ -111,6 +169,16 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
111169
if s.sandbox != nil {
112170
return nil, fmt.Errorf("cannot create another sandbox in sandbox: %s", s.sandbox.ID())
113171
}
172+
// We can provide additional directories where to search for
173+
// CDI specs if needed. immutable OS's only have specific
174+
// directories where applications can write too. For instance /opt/cdi
175+
//
176+
// _, err = withCDI(ociSpec.Annotations, []string{"/opt/cdi"}, ociSpec)
177+
//
178+
_, err = withCDI(ociSpec.Annotations, []string{}, ociSpec)
179+
if err != nil {
180+
return nil, fmt.Errorf("adding CDI devices failed")
181+
}
114182

115183
s.config = runtimeConfig
116184

0 commit comments

Comments
 (0)