@@ -2,6 +2,7 @@ package devices
22
33import (
44 "bufio"
5+ "context"
56 "encoding/json"
67 "fmt"
78 "os"
@@ -10,15 +11,21 @@ import (
1011 "regexp"
1112 "strconv"
1213 "strings"
14+ "sync"
1315
1416 "github.com/google/uuid"
17+ "github.com/onkernel/hypeman/lib/logger"
1518)
1619
1720const (
1821 mdevBusPath = "/sys/class/mdev_bus"
1922 mdevDevices = "/sys/bus/mdev/devices"
2023)
2124
25+ // mdevMu protects mdev creation/destruction to prevent race conditions
26+ // when multiple instances request vGPUs concurrently.
27+ var mdevMu sync.Mutex
28+
2229// DiscoverVFs returns all SR-IOV Virtual Functions available for vGPU.
2330// These are discovered by scanning /sys/class/mdev_bus/ which contains
2431// VFs that can host mdev devices.
@@ -31,6 +38,13 @@ func DiscoverVFs() ([]VirtualFunction, error) {
3138 return nil , fmt .Errorf ("read mdev_bus: %w" , err )
3239 }
3340
41+ // List mdevs once and build a lookup map to avoid O(n*m) performance
42+ mdevs , _ := ListMdevDevices ()
43+ mdevByVF := make (map [string ]bool , len (mdevs ))
44+ for _ , mdev := range mdevs {
45+ mdevByVF [mdev .VFAddress ] = true
46+ }
47+
3448 var vfs []VirtualFunction
3549 for _ , entry := range entries {
3650 vfAddr := entry .Name ()
@@ -43,8 +57,8 @@ func DiscoverVFs() ([]VirtualFunction, error) {
4357 parentGPU = filepath .Base (target )
4458 }
4559
46- // Check if this VF already has an mdev
47- hasMdev := vfHasMdev ( vfAddr )
60+ // Check if this VF already has an mdev (using pre-built lookup map)
61+ hasMdev := mdevByVF [ vfAddr ]
4862
4963 vfs = append (vfs , VirtualFunction {
5064 PCIAddress : vfAddr ,
@@ -56,18 +70,6 @@ func DiscoverVFs() ([]VirtualFunction, error) {
5670 return vfs , nil
5771}
5872
59- // vfHasMdev checks if a VF has an mdev device created on it
60- func vfHasMdev (vfAddr string ) bool {
61- // Check if any mdev device has this VF as parent
62- mdevs , _ := ListMdevDevices ()
63- for _ , mdev := range mdevs {
64- if mdev .VFAddress == vfAddr {
65- return true
66- }
67- }
68- return false
69- }
70-
7173// ListGPUProfiles returns available vGPU profiles with availability counts.
7274// Profiles are discovered from the first VF's mdev_supported_types directory.
7375func ListGPUProfiles () ([]GPUProfile , error ) {
@@ -328,7 +330,15 @@ func getProfileNameFromType(profileType, vfAddress string) string {
328330
329331// CreateMdev creates an mdev device for the given profile and instance.
330332// It finds an available VF and creates the mdev, returning the device info.
331- func CreateMdev (profileName , instanceID string ) (* MdevDevice , error ) {
333+ // This function is thread-safe and uses a mutex to prevent race conditions
334+ // when multiple instances request vGPUs concurrently.
335+ func CreateMdev (ctx context.Context , profileName , instanceID string ) (* MdevDevice , error ) {
336+ log := logger .FromContext (ctx )
337+
338+ // Lock to prevent race conditions when multiple instances request the same profile
339+ mdevMu .Lock ()
340+ defer mdevMu .Unlock ()
341+
332342 // Find profile type from name
333343 profileType , err := findProfileType (profileName )
334344 if err != nil {
@@ -364,12 +374,16 @@ func CreateMdev(profileName, instanceID string) (*MdevDevice, error) {
364374 // Generate UUID for the mdev
365375 mdevUUID := uuid .New ().String ()
366376
377+ log .DebugContext (ctx , "creating mdev device" , "profile" , profileName , "vf" , targetVF , "uuid" , mdevUUID , "instance_id" , instanceID )
378+
367379 // Create mdev by writing UUID to create file
368380 createPath := filepath .Join (mdevBusPath , targetVF , "mdev_supported_types" , profileType , "create" )
369381 if err := os .WriteFile (createPath , []byte (mdevUUID ), 0200 ); err != nil {
370- return nil , fmt .Errorf ("create mdev: %w" , err )
382+ return nil , fmt .Errorf ("create mdev on VF %s : %w" , targetVF , err )
371383 }
372384
385+ log .InfoContext (ctx , "created mdev device" , "profile" , profileName , "vf" , targetVF , "uuid" , mdevUUID , "instance_id" , instanceID )
386+
373387 return & MdevDevice {
374388 UUID : mdevUUID ,
375389 VFAddress : targetVF ,
@@ -381,40 +395,125 @@ func CreateMdev(profileName, instanceID string) (*MdevDevice, error) {
381395}
382396
383397// DestroyMdev removes an mdev device.
384- func DestroyMdev (mdevUUID string ) error {
398+ func DestroyMdev (ctx context.Context , mdevUUID string ) error {
399+ log := logger .FromContext (ctx )
400+
401+ // Lock to prevent race conditions during destruction
402+ mdevMu .Lock ()
403+ defer mdevMu .Unlock ()
404+
405+ log .DebugContext (ctx , "destroying mdev device" , "uuid" , mdevUUID )
406+
385407 // Try mdevctl undefine first (removes persistent definition)
386- exec .Command ("mdevctl" , "undefine" , "--uuid" , mdevUUID ).Run ()
408+ if err := exec .Command ("mdevctl" , "undefine" , "--uuid" , mdevUUID ).Run (); err != nil {
409+ // Log at debug level - mdevctl might not be installed or mdev might not be defined
410+ log .DebugContext (ctx , "mdevctl undefine failed (may be expected)" , "uuid" , mdevUUID , "error" , err )
411+ }
387412
388413 // Remove via sysfs
389414 removePath := filepath .Join (mdevDevices , mdevUUID , "remove" )
390415 if err := os .WriteFile (removePath , []byte ("1" ), 0200 ); err != nil {
391416 if os .IsNotExist (err ) {
417+ log .DebugContext (ctx , "mdev already removed" , "uuid" , mdevUUID )
392418 return nil // Already removed
393419 }
394- return fmt .Errorf ("remove mdev: %w" , err )
420+ return fmt .Errorf ("remove mdev %s : %w" , mdevUUID , err )
395421 }
396422
423+ log .InfoContext (ctx , "destroyed mdev device" , "uuid" , mdevUUID )
397424 return nil
398425}
399426
400- // ReconcileMdevs destroys orphaned mdevs that are not attached to running instances.
427+ // IsMdevInUse checks if an mdev device is currently bound to a driver (in use by a VM).
428+ // An mdev with a driver symlink is actively attached to a hypervisor/VFIO.
429+ func IsMdevInUse (mdevUUID string ) bool {
430+ driverPath := filepath .Join (mdevDevices , mdevUUID , "driver" )
431+ _ , err := os .Readlink (driverPath )
432+ return err == nil // Has a driver = in use
433+ }
434+
435+ // MdevReconcileInfo contains information needed to reconcile mdevs for an instance
436+ type MdevReconcileInfo struct {
437+ InstanceID string
438+ MdevUUID string
439+ IsRunning bool // true if instance's VMM is running or state is unknown
440+ }
441+
442+ // ReconcileMdevs destroys orphaned mdevs that belong to hypeman but are no longer in use.
401443// This is called on server startup to clean up stale mdevs from previous runs.
402- func ReconcileMdevs (isInstanceRunning func (string ) bool ) error {
444+ //
445+ // Safety guarantees:
446+ // - Only destroys mdevs that are tracked by hypeman instances (via hypemanMdevs map)
447+ // - Never destroys mdevs created by other processes on the host
448+ // - Skips mdevs that are currently bound to a driver (in use by a VM)
449+ // - Skips mdevs for instances in Running or Unknown state
450+ func ReconcileMdevs (ctx context.Context , instanceInfos []MdevReconcileInfo ) error {
451+ log := logger .FromContext (ctx )
452+
403453 mdevs , err := ListMdevDevices ()
404454 if err != nil {
405455 return fmt .Errorf ("list mdevs: %w" , err )
406456 }
407457
458+ if len (mdevs ) == 0 {
459+ log .DebugContext (ctx , "no mdev devices found to reconcile" )
460+ return nil
461+ }
462+
463+ // Build lookup maps from instance info
464+ // mdevUUID -> instanceID for mdevs managed by hypeman
465+ hypemanMdevs := make (map [string ]string , len (instanceInfos ))
466+ // instanceID -> isRunning for liveness check
467+ instanceRunning := make (map [string ]bool , len (instanceInfos ))
468+ for _ , info := range instanceInfos {
469+ if info .MdevUUID != "" {
470+ hypemanMdevs [info .MdevUUID ] = info .InstanceID
471+ instanceRunning [info .InstanceID ] = info .IsRunning
472+ }
473+ }
474+
475+ log .InfoContext (ctx , "reconciling mdev devices" , "total_mdevs" , len (mdevs ), "hypeman_mdevs" , len (hypemanMdevs ))
476+
477+ var destroyed , skippedNotOurs , skippedInUse , skippedRunning int
408478 for _ , mdev := range mdevs {
409- // If mdev has an instance ID and that instance is not running, destroy it
410- // If mdev has no instance ID, it's orphaned (created but never tracked)
411- if mdev .InstanceID == "" || ! isInstanceRunning (mdev .InstanceID ) {
412- if err := DestroyMdev (mdev .UUID ); err != nil {
413- // Log but continue - best effort cleanup
414- continue
415- }
479+ // Only consider mdevs that hypeman created
480+ instanceID , isOurs := hypemanMdevs [mdev .UUID ]
481+ if ! isOurs {
482+ log .DebugContext (ctx , "skipping mdev not managed by hypeman" , "uuid" , mdev .UUID , "profile" , mdev .ProfileName )
483+ skippedNotOurs ++
484+ continue
416485 }
486+
487+ // Skip if instance is running or in unknown state (might still be using the mdev)
488+ if instanceRunning [instanceID ] {
489+ log .DebugContext (ctx , "skipping mdev for running/unknown instance" , "uuid" , mdev .UUID , "instance_id" , instanceID )
490+ skippedRunning ++
491+ continue
492+ }
493+
494+ // Check if mdev is bound to a driver (in use by VM)
495+ if IsMdevInUse (mdev .UUID ) {
496+ log .WarnContext (ctx , "skipping mdev still bound to driver" , "uuid" , mdev .UUID , "instance_id" , instanceID )
497+ skippedInUse ++
498+ continue
499+ }
500+
501+ // Safe to destroy - it's ours, instance is not running, and not bound to driver
502+ log .InfoContext (ctx , "destroying orphaned mdev" , "uuid" , mdev .UUID , "profile" , mdev .ProfileName , "instance_id" , instanceID )
503+ if err := DestroyMdev (ctx , mdev .UUID ); err != nil {
504+ // Log error but continue - best effort cleanup
505+ log .WarnContext (ctx , "failed to destroy orphaned mdev" , "uuid" , mdev .UUID , "error" , err )
506+ continue
507+ }
508+ destroyed ++
417509 }
418510
511+ log .InfoContext (ctx , "mdev reconciliation complete" ,
512+ "destroyed" , destroyed ,
513+ "skipped_not_ours" , skippedNotOurs ,
514+ "skipped_in_use" , skippedInUse ,
515+ "skipped_running" , skippedRunning ,
516+ )
517+
419518 return nil
420519}
0 commit comments