@@ -8,9 +8,11 @@ import (
88 "os/exec"
99 "path/filepath"
1010 "regexp"
11+ "sort"
1112 "strconv"
1213 "strings"
1314 "sync"
15+ "time"
1416
1517 "github.com/google/uuid"
1618 "github.com/kernel/hypeman/lib/logger"
@@ -32,12 +34,57 @@ type profileMetadata struct {
3234 FramebufferMB int
3335}
3436
35- // cachedProfiles holds static profile metadata, loaded once on first access
37+ // cachedProfiles holds profile metadata with TTL-based expiry.
3638var (
3739 cachedProfiles []profileMetadata
38- cachedProfilesOnce sync.Once
40+ cachedProfilesMu sync.RWMutex
41+ cachedProfilesTime time.Time
42+ gpuProfileCacheTTL time.Duration = 30 * time .Minute // default
3943)
4044
45+ // SetGPUProfileCacheTTL sets the TTL for GPU profile metadata cache.
46+ // Should be called during application startup with the config value.
47+ func SetGPUProfileCacheTTL (ttl string ) {
48+ if ttl == "" {
49+ return
50+ }
51+ if d , err := time .ParseDuration (ttl ); err == nil {
52+ gpuProfileCacheTTL = d
53+ }
54+ }
55+
56+ // getProfileCacheTTL returns the configured TTL for profile metadata cache.
57+ func getProfileCacheTTL () time.Duration {
58+ return gpuProfileCacheTTL
59+ }
60+
61+ // getCachedProfiles returns cached profile metadata, refreshing if TTL has expired.
62+ func getCachedProfiles (firstVF string ) []profileMetadata {
63+ ttl := getProfileCacheTTL ()
64+
65+ // Fast path: check with read lock
66+ cachedProfilesMu .RLock ()
67+ if len (cachedProfiles ) > 0 && time .Since (cachedProfilesTime ) < ttl {
68+ profiles := cachedProfiles
69+ cachedProfilesMu .RUnlock ()
70+ return profiles
71+ }
72+ cachedProfilesMu .RUnlock ()
73+
74+ // Slow path: refresh cache with write lock
75+ cachedProfilesMu .Lock ()
76+ defer cachedProfilesMu .Unlock ()
77+
78+ // Double-check after acquiring write lock
79+ if len (cachedProfiles ) > 0 && time .Since (cachedProfilesTime ) < ttl {
80+ return cachedProfiles
81+ }
82+
83+ cachedProfiles = loadProfileMetadata (firstVF )
84+ cachedProfilesTime = time .Now ()
85+ return cachedProfiles
86+ }
87+
4188// DiscoverVFs returns all SR-IOV Virtual Functions available for vGPU.
4289// These are discovered by scanning /sys/class/mdev_bus/ which contains
4390// VFs that can host mdev devices.
@@ -100,17 +147,15 @@ func ListGPUProfilesWithVFs(vfs []VirtualFunction) ([]GPUProfile, error) {
100147 return nil , nil
101148 }
102149
103- // Load static profile metadata once (cached indefinitely)
104- cachedProfilesOnce .Do (func () {
105- cachedProfiles = loadProfileMetadata (vfs [0 ].PCIAddress )
106- })
150+ // Load profile metadata with TTL-based caching
151+ cachedMeta := getCachedProfiles (vfs [0 ].PCIAddress )
107152
108153 // Count availability for all profiles in parallel
109- availability := countAvailableVFsForProfilesParallel (vfs , cachedProfiles )
154+ availability := countAvailableVFsForProfilesParallel (vfs , cachedMeta )
110155
111156 // Build result with dynamic availability counts
112- profiles := make ([]GPUProfile , 0 , len (cachedProfiles ))
113- for _ , meta := range cachedProfiles {
157+ profiles := make ([]GPUProfile , 0 , len (cachedMeta ))
158+ for _ , meta := range cachedMeta {
114159 profiles = append (profiles , GPUProfile {
115160 Name : meta .Name ,
116161 FramebufferMB : meta .FramebufferMB ,
@@ -194,8 +239,8 @@ func parseFramebufferFromDescription(typeDir string) int {
194239}
195240
196241// countAvailableVFsForProfilesParallel counts available instances for all profiles in parallel.
197- // Optimized: all VFs on the same parent GPU have identical profile support,
198- // so we only sample one VF per parent instead of reading from every VF .
242+ // Groups VFs by parent GPU, then sums available_instances across all free VFs.
243+ // For SR-IOV vGPU, each VF typically has available_instances of 0 or 1 .
199244func countAvailableVFsForProfilesParallel (vfs []VirtualFunction , profiles []profileMetadata ) map [string ]int {
200245 if len (vfs ) == 0 || len (profiles ) == 0 {
201246 return make (map [string ]int )
@@ -352,6 +397,118 @@ func getProfileNameFromType(profileType, vfAddress string) string {
352397 return strings .TrimSpace (string (data ))
353398}
354399
400+ // getProfileFramebufferMB returns the framebuffer size in MB for a profile type.
401+ // Uses cached profile metadata for fast lookup.
402+ func getProfileFramebufferMB (profileType string ) int {
403+ cachedProfilesMu .RLock ()
404+ defer cachedProfilesMu .RUnlock ()
405+
406+ for _ , p := range cachedProfiles {
407+ if p .TypeName == profileType {
408+ return p .FramebufferMB
409+ }
410+ }
411+ return 0
412+ }
413+
414+ // calculateGPUVRAMUsage calculates VRAM usage per GPU from active mdevs.
415+ // Returns a map of parentGPU -> usedVRAMMB.
416+ func calculateGPUVRAMUsage (vfs []VirtualFunction , mdevs []MdevDevice ) map [string ]int {
417+ // Build VF -> parentGPU lookup
418+ vfToParent := make (map [string ]string , len (vfs ))
419+ for _ , vf := range vfs {
420+ vfToParent [vf .PCIAddress ] = vf .ParentGPU
421+ }
422+
423+ // Sum framebuffer usage per GPU
424+ usageByGPU := make (map [string ]int )
425+ for _ , mdev := range mdevs {
426+ parentGPU := vfToParent [mdev .VFAddress ]
427+ if parentGPU == "" {
428+ continue
429+ }
430+ usageByGPU [parentGPU ] += getProfileFramebufferMB (mdev .ProfileType )
431+ }
432+
433+ return usageByGPU
434+ }
435+
436+ // selectLeastLoadedVF selects a VF from the GPU with the most available VRAM
437+ // that can create the requested profile. Returns empty string if none available.
438+ func selectLeastLoadedVF (ctx context.Context , vfs []VirtualFunction , profileType string ) string {
439+ log := logger .FromContext (ctx )
440+
441+ // Get active mdevs to calculate VRAM usage
442+ mdevs , _ := ListMdevDevices ()
443+
444+ // Calculate VRAM usage per GPU
445+ vramUsage := calculateGPUVRAMUsage (vfs , mdevs )
446+
447+ // Group free VFs by parent GPU
448+ freeVFsByGPU := make (map [string ][]VirtualFunction )
449+ allGPUs := make (map [string ]bool )
450+ for _ , vf := range vfs {
451+ allGPUs [vf .ParentGPU ] = true
452+ if ! vf .HasMdev {
453+ freeVFsByGPU [vf .ParentGPU ] = append (freeVFsByGPU [vf .ParentGPU ], vf )
454+ }
455+ }
456+
457+ // Build list of GPUs sorted by VRAM usage (ascending = least loaded first)
458+ type gpuLoad struct {
459+ gpu string
460+ usedMB int
461+ }
462+ var gpuLoads []gpuLoad
463+ for gpu := range allGPUs {
464+ gpuLoads = append (gpuLoads , gpuLoad {gpu : gpu , usedMB : vramUsage [gpu ]})
465+ }
466+ sort .Slice (gpuLoads , func (i , j int ) bool {
467+ return gpuLoads [i ].usedMB < gpuLoads [j ].usedMB
468+ })
469+
470+ log .DebugContext (ctx , "GPU VRAM usage for load balancing" ,
471+ "gpu_count" , len (gpuLoads ),
472+ "profile_type" , profileType )
473+
474+ // Try each GPU in order of least loaded
475+ for _ , gl := range gpuLoads {
476+ freeVFs := freeVFsByGPU [gl .gpu ]
477+ if len (freeVFs ) == 0 {
478+ log .DebugContext (ctx , "skipping GPU: no free VFs" ,
479+ "gpu" , gl .gpu ,
480+ "used_mb" , gl .usedMB )
481+ continue
482+ }
483+
484+ // Check if any free VF on this GPU can create the profile
485+ for _ , vf := range freeVFs {
486+ availPath := filepath .Join (mdevBusPath , vf .PCIAddress , "mdev_supported_types" , profileType , "available_instances" )
487+ data , err := os .ReadFile (availPath )
488+ if err != nil {
489+ continue
490+ }
491+ instances , err := strconv .Atoi (strings .TrimSpace (string (data )))
492+ if err != nil || instances < 1 {
493+ continue
494+ }
495+
496+ log .DebugContext (ctx , "selected VF from least loaded GPU" ,
497+ "vf" , vf .PCIAddress ,
498+ "gpu" , gl .gpu ,
499+ "gpu_used_mb" , gl .usedMB )
500+ return vf .PCIAddress
501+ }
502+
503+ log .DebugContext (ctx , "skipping GPU: no VF can create profile" ,
504+ "gpu" , gl .gpu ,
505+ "used_mb" , gl .usedMB ,
506+ "profile_type" , profileType )
507+ }
508+
509+ return ""
510+ }
511+
355512// CreateMdev creates an mdev device for the given profile and instance.
356513// It finds an available VF and creates the mdev, returning the device info.
357514// This function is thread-safe and uses a mutex to prevent race conditions
@@ -369,32 +526,19 @@ func CreateMdev(ctx context.Context, profileName, instanceID string) (*MdevDevic
369526 return nil , err
370527 }
371528
372- // Find an available VF
529+ // Discover all VFs
373530 vfs , err := DiscoverVFs ()
374531 if err != nil {
375532 return nil , fmt .Errorf ("discover VFs: %w" , err )
376533 }
377534
378- var targetVF string
379- for _ , vf := range vfs {
380- // Skip VFs that already have an mdev
381- if vf .HasMdev {
382- continue
383- }
384- // Check if this VF can create the profile
385- availPath := filepath .Join (mdevBusPath , vf .PCIAddress , "mdev_supported_types" , profileType , "available_instances" )
386- data , err := os .ReadFile (availPath )
387- if err != nil {
388- continue
389- }
390- instances , err := strconv .Atoi (strings .TrimSpace (string (data )))
391- if err != nil || instances < 1 {
392- continue
393- }
394- targetVF = vf .PCIAddress
395- break
535+ // Ensure profile cache is populated (needed for VRAM calculation)
536+ if len (vfs ) > 0 {
537+ _ = getCachedProfiles (vfs [0 ].PCIAddress )
396538 }
397539
540+ // Select VF from the least loaded GPU (by VRAM usage)
541+ targetVF := selectLeastLoadedVF (ctx , vfs , profileType )
398542 if targetVF == "" {
399543 return nil , fmt .Errorf ("no available VF for profile %q" , profileName )
400544 }
0 commit comments