Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions pkg/abi/nvgpu/ctrl.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,10 @@ const (
NV0000_CTRL_CMD_GPU_GET_ACTIVE_DEVICE_IDS = 0x288
NV0000_CTRL_CMD_GPU_ASYNC_ATTACH_ID = 0x289
NV0000_CTRL_CMD_GPU_WAIT_ATTACH_ID = 0x290
)

// From src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000gsync.h:
const (
NV0000_CTRL_CMD_GSYNC_GET_ATTACHED_IDS = 0x301
NV0000_CTRL_GPU_INVALID_ID = 0xffffffff
NV0000_CTRL_GPU_MAX_PROBED_GPUS = NV_MAX_DEVICES
NV0000_GPU_MAX_GID_LENGTH = 0x100
)

// NV0000_CTRL_GPU_GET_ID_INFO_PARAMS is the param type for NV0000_CTRL_CMD_GPU_GET_ID_INFO,
Expand All @@ -98,6 +97,25 @@ type NV0000_CTRL_GPU_GET_ID_INFO_PARAMS struct {
NumaID int32
}

// +marshal
type NV0000_CTRL_GPU_ATTACH_IDS_PARAMS struct {
GPUIDs [NV0000_CTRL_GPU_MAX_PROBED_GPUS]uint32
FailedID uint32
}

// +marshal
type NV0000_CTRL_GPU_GET_UUID_FROM_GPU_ID_PARAMS struct {
GPUID uint32
Flags uint32
GPUUUID [NV0000_GPU_MAX_GID_LENGTH]byte
UUIDStrLen uint32
}

// From src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000gsync.h:
const (
NV0000_CTRL_CMD_GSYNC_GET_ATTACHED_IDS = 0x301
)

// From src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000syncgpuboost.h:
const (
NV0000_CTRL_CMD_SYNC_GPU_BOOST_GROUP_INFO = 0xa04
Expand Down
33 changes: 33 additions & 0 deletions pkg/abi/nvgpu/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,39 @@ const (
// Frontend ioctl parameter structs, from src/common/sdk/nvidia/inc/nvos.h or
// kernel-open/common/inc/nv-ioctl.h.

// IoctlCardInfo is nv_ioctl_card_info_t. NV_ESC_CARD_INFO takes an array of
// IoctlCardInfo as parameter.
//
// +marshal
type IoctlCardInfo struct {
Valid uint8
Pad0 [3]byte
PCIInfo PCIInfo
GPUID uint32
InterruptLine uint16
Pad1 [2]byte
RegAddress uint64
RegSize uint64
FBAddress uint64
FBSize uint64
MinorNumber uint32
DevName [10]byte
Pad2 [2]byte
}

// PCIInfo is nv_pci_info_t.
//
// +marshal
type PCIInfo struct {
Domain uint32
Bus uint8
Slot uint8
Function uint8
Pad0 uint8
VendorID uint16
DeviceID uint16
}

// IoctlRegisterFD is the parameter type for NV_ESC_REGISTER_FD.
//
// +marshal
Expand Down
10 changes: 7 additions & 3 deletions pkg/abi/nvgpu/nvgpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@ import (

// Device numbers.
const (
NV_MAJOR_DEVICE_NUMBER = 195 // from kernel-open/common/inc/nv.h
NV_CONTROL_DEVICE_MINOR = 255 // from kernel-open/common/inc/nv-linux.h
NVIDIA_UVM_PRIMARY_MINOR_NUMBER = 0 // from kernel-open/nvidia-uvm/uvm_common.h
// From kernel-open/common/inc/nv-chardev-numbers.h:
NV_MAJOR_DEVICE_NUMBER = 195
NV_MINOR_DEVICE_NUMBER_REGULAR_MAX = 247
NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE = 255

// From kernel-open/nvidia-uvm/uvm_common.h:
NVIDIA_UVM_PRIMARY_MINOR_NUMBER = 0
)

// Handle is NvHandle, from src/common/sdk/nvidia/inc/nvtypes.h.
Expand Down
3 changes: 3 additions & 0 deletions pkg/fsutil/fsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ type DirentHandler func(ino uint64, off int64, ftype uint8, name string, reclen
// ForEachDirent retrieves all dirents from dirfd using getdents64(2) and
// invokes handleDirent on them.
func ForEachDirent(dirfd int, handleDirent DirentHandler) error {
if _, err := unix.Seek(dirfd, unix.SEEK_SET, 0); err != nil {
return err
}
var direntsBuf [8192]byte
for {
n, err := unix.Getdents(dirfd, direntsBuf[:])
Expand Down
2 changes: 1 addition & 1 deletion pkg/sentry/devices/nvproxy/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ type frontendDevice struct {
}

func (dev *frontendDevice) isCtlDevice() bool {
return dev.minor == nvgpu.NV_CONTROL_DEVICE_MINOR
return dev.minor == nvgpu.NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE
}

func (dev *frontendDevice) basename() string {
Expand Down
17 changes: 14 additions & 3 deletions pkg/sentry/devices/nvproxy/nvproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,26 @@ func Register(vfsObj *vfs.VirtualFilesystem, version nvconf.DriverVersion, drive
frontendFDs: make(map[*frontendFD]struct{}),
clients: make(map[nvgpu.Handle]*rootClient),
}
for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ {
if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{
for minor := uint32(0); minor <= nvgpu.NV_MINOR_DEVICE_NUMBER_REGULAR_MAX; minor++ {
dev := &frontendDevice{
nvp: nvp,
minor: minor,
}, &vfs.RegisterDeviceOptions{
}
nvp.regularDevs[minor] = dev
if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, dev, &vfs.RegisterDeviceOptions{
GroupName: "nvidia-frontend",
}); err != nil {
return err
}
}
if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, nvgpu.NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE, &frontendDevice{
nvp: nvp,
minor: nvgpu.NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE,
}, &vfs.RegisterDeviceOptions{
GroupName: "nvidia-frontend",
}); err != nil {
return err
}
if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{
nvp: nvp,
}, &vfs.RegisterDeviceOptions{
Expand All @@ -82,6 +92,7 @@ type nvproxy struct {
version nvconf.DriverVersion
capsEnabled nvconf.DriverCaps
useDevGofer bool
regularDevs [nvgpu.NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1]*frontendDevice

fdsMu fdsMutex `state:"nosave"`
frontendFDs map[*frontendFD]struct{}
Expand Down
6 changes: 3 additions & 3 deletions pkg/sentry/devices/nvproxy/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ func Init() {
getInfo: func() *DriverABIInfo {
return &DriverABIInfo{
FrontendInfos: map[uint32]IoctlInfo{
nvgpu.NV_ESC_CARD_INFO: simpleIoctlInfo("NV_ESC_CARD_INFO", "nv_ioctl_card_info_t"),
nvgpu.NV_ESC_CARD_INFO: ioctlInfoWithStructName("NV_ESC_CARD_INFO", nvgpu.IoctlCardInfo{}, "nv_ioctl_card_info_t"),
nvgpu.NV_ESC_CHECK_VERSION_STR: ioctlInfoWithStructName("NV_ESC_CHECK_VERSION_STR", nvgpu.RMAPIVersion{}, "nv_ioctl_rm_api_version_t"),
nvgpu.NV_ESC_ATTACH_GPUS_TO_FD: simpleIoctlInfo("NV_ESC_ATTACH_GPUS_TO_FD"), // No params struct, params is a NvU32 array containing GPU IDs
nvgpu.NV_ESC_SYS_PARAMS: ioctlInfoWithStructName("NV_ESC_SYS_PARAMS", nvgpu.IoctlSysParams{}, "nv_ioctl_sys_params_t"),
Expand Down Expand Up @@ -494,10 +494,10 @@ func Init() {
nvgpu.NV0000_CTRL_CMD_GPU_GET_DEVICE_IDS: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_GET_DEVICE_IDS", "NV0000_CTRL_GPU_GET_DEVICE_IDS_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2", "NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_GET_PROBED_IDS: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_GET_PROBED_IDS", "NV0000_CTRL_GPU_GET_PROBED_IDS_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_ATTACH_IDS: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_ATTACH_IDS", "NV0000_CTRL_GPU_ATTACH_IDS_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_ATTACH_IDS: ioctlInfo("NV0000_CTRL_CMD_GPU_ATTACH_IDS", nvgpu.NV0000_CTRL_GPU_ATTACH_IDS_PARAMS{}),
nvgpu.NV0000_CTRL_CMD_GPU_DETACH_IDS: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_DETACH_IDS", "NV0000_CTRL_GPU_DETACH_IDS_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_GET_PCI_INFO: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_GET_PCI_INFO", "NV0000_CTRL_GPU_GET_PCI_INFO_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID", "NV0000_CTRL_GPU_GET_UUID_FROM_GPU_ID_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID: ioctlInfo("NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID", nvgpu.NV0000_CTRL_GPU_GET_UUID_FROM_GPU_ID_PARAMS{}),
nvgpu.NV0000_CTRL_CMD_GPU_QUERY_DRAIN_STATE: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_QUERY_DRAIN_STATE", "NV0000_CTRL_GPU_QUERY_DRAIN_STATE_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GPU_GET_MEMOP_ENABLE: simpleIoctlInfo("NV0000_CTRL_CMD_GPU_GET_MEMOP_ENABLE", "NV0000_CTRL_GPU_GET_MEMOP_ENABLE_PARAMS"),
nvgpu.NV0000_CTRL_CMD_GSYNC_GET_ATTACHED_IDS: simpleIoctlInfo("NV0000_CTRL_CMD_GSYNC_GET_ATTACHED_IDS", "NV0000_CTRL_GSYNC_GET_ATTACHED_IDS_PARAMS"),
Expand Down
5 changes: 4 additions & 1 deletion pkg/sentry/fsimpl/dev/dev.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ func (FilesystemType) Name() string {
func (fst FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
mntns, err := vfsObj.NewMountNamespace(ctx, creds, source /* source */, tmpfs.Name, &vfs.MountOptions{GetFilesystemOptions: vfs.GetFilesystemOptions{
Data: "mode=0755", // opts from drivers/base/devtmpfs.c:devtmpfs_init()
InternalData: tmpfs.FilesystemOpts{
FilesystemType: fst,
},
}}, nil)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -84,7 +87,7 @@ func (fst FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virtual
}

// Release implements vfs.FilesystemType.Release.
func (fst *FilesystemType) Release(ctx context.Context) {}
func (fst FilesystemType) Release(ctx context.Context) {}

// InternalData contains internal data passed in via vfs.GetFilesystemOptions.
type InternalData struct {
Expand Down
13 changes: 13 additions & 0 deletions pkg/sentry/vfs/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ func (vfs *VirtualFilesystem) IsDeviceRegistered(kind DeviceKind, major, minor u
return ok
}

// GetRegisteredDevice returns the device registered for the given (kind,
// major, minor) tuple.
func (vfs *VirtualFilesystem) GetRegisteredDevice(kind DeviceKind, major, minor uint32) Device {
tup := devTuple{kind, major, minor}
vfs.devicesMu.RLock()
defer vfs.devicesMu.RUnlock()
rd, ok := vfs.devices[tup]
if !ok {
return nil
}
return rd.dev
}

// OpenDeviceSpecialFile returns a FileDescription representing the given
// device.
func (vfs *VirtualFilesystem) OpenDeviceSpecialFile(ctx context.Context, mnt *Mount, d *Dentry, kind DeviceKind, major, minor uint32, opts *OpenOptions) (*FileDescription, error) {
Expand Down
16 changes: 16 additions & 0 deletions pkg/sentry/vfs/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -1623,3 +1623,19 @@ func (vfs *VirtualFilesystem) generateOptionalTags(ctx context.Context, mnt *Mou
}
return optionalSb.String()
}

// GetAllMounts returns a slice containing every Mount in vfs, regardless of
// namespace. A reference is held on each returned Mount, which must be dropped
// by the caller when no longer needed.
func (vfs *VirtualFilesystem) GetAllMounts(ctx context.Context) []*Mount {
var mnts []*Mount
vfs.lockMounts()
defer vfs.unlockMounts(ctx)
for mnt := range vfs.mounts.Range {
if !mnt.tryIncMountedRef() {
continue
}
mnts = append(mnts, mnt)
}
return mnts
}
1 change: 1 addition & 0 deletions runsc/boot/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) (retErr error)
cm.restorer = &restorer{
cm: cm,
stateFile: reader,
metadata: metadata,
background: o.Background,
timer: timer,
mainMF: mf,
Expand Down
2 changes: 1 addition & 1 deletion runsc/boot/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ func New(args Args) (*Loader, error) {
enableAutosave(l, args.Conf.TestOnlyAutosaveResume, l.saveFDs)
}

l.kernelInitExtra()
l.kernelInitExtra(l.k.SupervisorContext())

// Create the control server using the provided FD.
//
Expand Down
14 changes: 13 additions & 1 deletion runsc/boot/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ type restorer struct {
// stateFile is a reader for the statefile.
stateFile io.ReadCloser

// metadata is the metadata contained in the statefile.
metadata map[string]string

// timer is the timer for the restore process.
// The `restorer` owns the timer and will end it when restore is complete.
timer *timing.Timer
Expand Down Expand Up @@ -268,6 +271,11 @@ func (r *restorer) restore(l *Loader) error {
r.asyncMFLoader.KickoffPrivate(mfmap)
}

ctx, err = r.prepareRestoreContextExtraLocked(ctx, l)
if err != nil {
return err
}

// Load the state.
r.timer.Reached("loading kernel")
if err := l.k.LoadFrom(ctx, r.stateFile, r.asyncMFLoader, nil, oldInetStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}, l.saveRestoreNet); err != nil {
Expand Down Expand Up @@ -345,7 +353,7 @@ func (r *restorer) restore(l *Loader) error {

l.k.RestoreContainerMapping(l.containerIDs)

l.kernelInitExtra()
l.kernelInitExtra(ctx)

// Refresh the control server with the newly created kernel.
l.ctrl.refreshHandlers()
Expand Down Expand Up @@ -441,6 +449,10 @@ func (l *Loader) saveWithOpts(saveOpts *state.SaveOpts, execOpts *control.SaveRe
}
saveOpts.Metadata[ContainerSpecsKey] = specsStr

if err := l.prepareSaveOptsExtra(saveOpts); err != nil {
return err
}

state := control.State{
Kernel: l.k,
Watchdog: l.watchdog,
Expand Down
13 changes: 12 additions & 1 deletion runsc/boot/restore_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import (
"io"

specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
"gvisor.dev/gvisor/pkg/sentry/state"
"gvisor.dev/gvisor/pkg/sentry/state/stateio"
"gvisor.dev/gvisor/runsc/config"
)
Expand All @@ -32,10 +34,19 @@ func newProcInternalData(conf *config.Config, _ *specs.Spec) *proc.InternalData
}
}

func (l *Loader) kernelInitExtra() {}
func (l *Loader) kernelInitExtra(ctx context.Context) {}

type RestoreOptsExtra struct{}

func getRestoreReadersImpl(o *RestoreOpts) (io.ReadCloser, io.ReadCloser, stateio.AsyncReader, error) {
return getRestoreReadersForLocalCheckpointFiles(o)
}

func (l *Loader) prepareSaveOptsExtra(saveOpts *state.SaveOpts) error {
return nil
}

// +checklocks:l.mu
func (r *restorer) prepareRestoreContextExtraLocked(ctx context.Context, l *Loader) (context.Context, error) {
return ctx, nil
}
5 changes: 4 additions & 1 deletion runsc/boot/vfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,7 @@ func createDeviceFiles(ctx context.Context, creds *auth.Credentials, info *conta
// spec.Linux.Devices. So manually create appropriate device files.
mode := os.FileMode(0666)
nvidiaDevs := []specs.LinuxDevice{
{Path: "/dev/nvidiactl", Type: "c", Major: nvgpu.NV_MAJOR_DEVICE_NUMBER, Minor: nvgpu.NV_CONTROL_DEVICE_MINOR, FileMode: &mode},
{Path: "/dev/nvidiactl", Type: "c", Major: nvgpu.NV_MAJOR_DEVICE_NUMBER, Minor: nvgpu.NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE, FileMode: &mode},
{Path: "/dev/nvidia-uvm", Type: "c", Major: int64(info.nvidiaUVMDevMajor), Minor: nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, FileMode: &mode},
}
devClient := devutil.GoferClientFromContext(ctx)
Expand All @@ -1451,6 +1451,9 @@ func createDeviceFiles(ctx context.Context, creds *auth.Credentials, info *conta
if err != nil {
return fmt.Errorf("invalid nvidia device name %q: %w", name, err)
}
if minor > nvgpu.NV_MINOR_DEVICE_NUMBER_REGULAR_MAX {
return fmt.Errorf("invalid nvidia regular minor device number %d", minor)
}
nvidiaDevs = append(nvidiaDevs, specs.LinuxDevice{Path: fmt.Sprintf("/dev/nvidia%d", minor), Type: "c", Major: nvgpu.NV_MAJOR_DEVICE_NUMBER, Minor: int64(minor), FileMode: &mode})
}
for _, nvidiaDev := range nvidiaDevs {
Expand Down
1 change: 1 addition & 0 deletions runsc/specutils/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ go_library(
"namespace.go",
"nvidia.go",
"restore.go",
"restore_impl.go",
"specutils.go",
],
visibility = ["//:sandbox"],
Expand Down
5 changes: 5 additions & 0 deletions runsc/specutils/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ func validateMounts(field, cName string, o, n []specs.Mount) error {
}

func validateDevices(field, cName string, o, n []specs.LinuxDevice) error {
var err error
o, n, err = prevalidateDevicesImpl(field, cName, o, n)
if err != nil {
return err
}
if len(o) != len(n) {
return validateErrorWithMsg(field, cName, o, n, "length mismatch")
}
Expand Down
26 changes: 26 additions & 0 deletions runsc/specutils/restore_impl.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright 2025 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !false
// +build !false

package specutils

import (
specs "github.com/opencontainers/runtime-spec/specs-go"
)

func prevalidateDevicesImpl(field, cName string, o, n []specs.LinuxDevice) ([]specs.LinuxDevice, []specs.LinuxDevice, error) {
return o, n, nil
}