Skip to content

Commit 6b3d6fc

Browse files
authored
Merge branch 'main' into feat/vm-utilization-metrics
2 parents 125d603 + 1616beb commit 6b3d6fc

File tree

4 files changed

+190
-37
lines changed

4 files changed

+190
-37
lines changed

cmd/api/api/api_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import (
2525
// newTestService creates an ApiService for testing with automatic cleanup
2626
func newTestService(t *testing.T) *ApiService {
2727
cfg := &config.Config{
28-
DataDir: t.TempDir(),
29-
BridgeName: "vmbr0",
30-
SubnetCIDR: "10.100.0.0/16",
31-
DNSServer: "1.1.1.1",
28+
DataDir: t.TempDir(),
29+
BridgeName: "vmbr0",
30+
SubnetCIDR: "10.100.0.0/16",
31+
DNSServer: "1.1.1.1",
3232
}
3333

3434
p := paths.New(cfg.DataDir)

cmd/api/config/config.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ type Config struct {
118118
// Hypervisor configuration
119119
DefaultHypervisor string // Default hypervisor type: "cloud-hypervisor" or "qemu"
120120

121+
// GPU configuration
122+
GPUProfileCacheTTL string // TTL for GPU profile metadata cache (e.g., "30m")
123+
121124
// Oversubscription ratios (1.0 = no oversubscription, 2.0 = 2x oversubscription)
122125
OversubCPU float64 // CPU oversubscription ratio
123126
OversubMemory float64 // Memory oversubscription ratio
@@ -198,8 +201,8 @@ func Load() *Config {
198201
CloudflareApiToken: getEnv("CLOUDFLARE_API_TOKEN", ""),
199202

200203
// API ingress configuration
201-
ApiHostname: getEnv("API_HOSTNAME", ""), // Empty = disabled
202-
ApiTLS: getEnvBool("API_TLS", true), // Default to TLS enabled
204+
ApiHostname: getEnv("API_HOSTNAME", ""), // Empty = disabled
205+
ApiTLS: getEnvBool("API_TLS", true), // Default to TLS enabled
203206
ApiRedirectHTTP: getEnvBool("API_REDIRECT_HTTP", true),
204207

205208
// Build system configuration
@@ -212,6 +215,9 @@ func Load() *Config {
212215
// Hypervisor configuration
213216
DefaultHypervisor: getEnv("DEFAULT_HYPERVISOR", "cloud-hypervisor"),
214217

218+
// GPU configuration
219+
GPUProfileCacheTTL: getEnv("GPU_PROFILE_CACHE_TTL", "30m"),
220+
215221
// Oversubscription ratios (1.0 = no oversubscription)
216222
OversubCPU: getEnvFloat("OVERSUB_CPU", 4.0),
217223
OversubMemory: getEnvFloat("OVERSUB_MEMORY", 1.0),

cmd/api/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/ghodss/yaml"
1919
"github.com/go-chi/chi/v5"
2020
"github.com/go-chi/chi/v5/middleware"
21-
nethttpmiddleware "github.com/oapi-codegen/nethttp-middleware"
2221
"github.com/kernel/hypeman"
2322
"github.com/kernel/hypeman/cmd/api/api"
2423
"github.com/kernel/hypeman/cmd/api/config"
@@ -30,6 +29,7 @@ import (
3029
"github.com/kernel/hypeman/lib/oapi"
3130
"github.com/kernel/hypeman/lib/otel"
3231
"github.com/kernel/hypeman/lib/vmm"
32+
nethttpmiddleware "github.com/oapi-codegen/nethttp-middleware"
3333
"github.com/riandyrn/otelchi"
3434
"golang.org/x/sync/errgroup"
3535
)
@@ -51,6 +51,9 @@ func run() error {
5151
return fmt.Errorf("invalid configuration: %w", err)
5252
}
5353

54+
// Configure GPU profile cache TTL
55+
devices.SetGPUProfileCacheTTL(cfg.GPUProfileCacheTTL)
56+
5457
// Initialize OpenTelemetry (before wire initialization)
5558
otelCfg := otel.Config{
5659
Enabled: cfg.OtelEnabled,

lib/devices/mdev.go

Lines changed: 174 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ import (
88
"os/exec"
99
"path/filepath"
1010
"regexp"
11+
"sort"
1112
"strconv"
1213
"strings"
1314
"sync"
15+
"time"
1416

1517
"github.com/google/uuid"
1618
"github.com/kernel/hypeman/lib/logger"
@@ -32,12 +34,57 @@ type profileMetadata struct {
3234
FramebufferMB int
3335
}
3436

35-
// cachedProfiles holds static profile metadata, loaded once on first access
37+
// cachedProfiles holds profile metadata with TTL-based expiry.
3638
var (
3739
cachedProfiles []profileMetadata
38-
cachedProfilesOnce sync.Once
40+
cachedProfilesMu sync.RWMutex
41+
cachedProfilesTime time.Time
42+
gpuProfileCacheTTL time.Duration = 30 * time.Minute // default
3943
)
4044

45+
// SetGPUProfileCacheTTL sets the TTL for GPU profile metadata cache.
46+
// Should be called during application startup with the config value.
47+
func SetGPUProfileCacheTTL(ttl string) {
48+
if ttl == "" {
49+
return
50+
}
51+
if d, err := time.ParseDuration(ttl); err == nil {
52+
gpuProfileCacheTTL = d
53+
}
54+
}
55+
56+
// getProfileCacheTTL returns the configured TTL for profile metadata cache.
57+
func getProfileCacheTTL() time.Duration {
58+
return gpuProfileCacheTTL
59+
}
60+
61+
// getCachedProfiles returns cached profile metadata, refreshing if TTL has expired.
62+
func getCachedProfiles(firstVF string) []profileMetadata {
63+
ttl := getProfileCacheTTL()
64+
65+
// Fast path: check with read lock
66+
cachedProfilesMu.RLock()
67+
if len(cachedProfiles) > 0 && time.Since(cachedProfilesTime) < ttl {
68+
profiles := cachedProfiles
69+
cachedProfilesMu.RUnlock()
70+
return profiles
71+
}
72+
cachedProfilesMu.RUnlock()
73+
74+
// Slow path: refresh cache with write lock
75+
cachedProfilesMu.Lock()
76+
defer cachedProfilesMu.Unlock()
77+
78+
// Double-check after acquiring write lock
79+
if len(cachedProfiles) > 0 && time.Since(cachedProfilesTime) < ttl {
80+
return cachedProfiles
81+
}
82+
83+
cachedProfiles = loadProfileMetadata(firstVF)
84+
cachedProfilesTime = time.Now()
85+
return cachedProfiles
86+
}
87+
4188
// DiscoverVFs returns all SR-IOV Virtual Functions available for vGPU.
4289
// These are discovered by scanning /sys/class/mdev_bus/ which contains
4390
// VFs that can host mdev devices.
@@ -100,17 +147,15 @@ func ListGPUProfilesWithVFs(vfs []VirtualFunction) ([]GPUProfile, error) {
100147
return nil, nil
101148
}
102149

103-
// Load static profile metadata once (cached indefinitely)
104-
cachedProfilesOnce.Do(func() {
105-
cachedProfiles = loadProfileMetadata(vfs[0].PCIAddress)
106-
})
150+
// Load profile metadata with TTL-based caching
151+
cachedMeta := getCachedProfiles(vfs[0].PCIAddress)
107152

108153
// Count availability for all profiles in parallel
109-
availability := countAvailableVFsForProfilesParallel(vfs, cachedProfiles)
154+
availability := countAvailableVFsForProfilesParallel(vfs, cachedMeta)
110155

111156
// Build result with dynamic availability counts
112-
profiles := make([]GPUProfile, 0, len(cachedProfiles))
113-
for _, meta := range cachedProfiles {
157+
profiles := make([]GPUProfile, 0, len(cachedMeta))
158+
for _, meta := range cachedMeta {
114159
profiles = append(profiles, GPUProfile{
115160
Name: meta.Name,
116161
FramebufferMB: meta.FramebufferMB,
@@ -194,8 +239,8 @@ func parseFramebufferFromDescription(typeDir string) int {
194239
}
195240

196241
// countAvailableVFsForProfilesParallel counts available instances for all profiles in parallel.
197-
// Optimized: all VFs on the same parent GPU have identical profile support,
198-
// so we only sample one VF per parent instead of reading from every VF.
242+
// Groups VFs by parent GPU, then sums available_instances across all free VFs.
243+
// For SR-IOV vGPU, each VF typically has available_instances of 0 or 1.
199244
func countAvailableVFsForProfilesParallel(vfs []VirtualFunction, profiles []profileMetadata) map[string]int {
200245
if len(vfs) == 0 || len(profiles) == 0 {
201246
return make(map[string]int)
@@ -352,6 +397,118 @@ func getProfileNameFromType(profileType, vfAddress string) string {
352397
return strings.TrimSpace(string(data))
353398
}
354399

400+
// getProfileFramebufferMB returns the framebuffer size in MB for a profile type.
401+
// Uses cached profile metadata for fast lookup.
402+
func getProfileFramebufferMB(profileType string) int {
403+
cachedProfilesMu.RLock()
404+
defer cachedProfilesMu.RUnlock()
405+
406+
for _, p := range cachedProfiles {
407+
if p.TypeName == profileType {
408+
return p.FramebufferMB
409+
}
410+
}
411+
return 0
412+
}
413+
414+
// calculateGPUVRAMUsage calculates VRAM usage per GPU from active mdevs.
415+
// Returns a map of parentGPU -> usedVRAMMB.
416+
func calculateGPUVRAMUsage(vfs []VirtualFunction, mdevs []MdevDevice) map[string]int {
417+
// Build VF -> parentGPU lookup
418+
vfToParent := make(map[string]string, len(vfs))
419+
for _, vf := range vfs {
420+
vfToParent[vf.PCIAddress] = vf.ParentGPU
421+
}
422+
423+
// Sum framebuffer usage per GPU
424+
usageByGPU := make(map[string]int)
425+
for _, mdev := range mdevs {
426+
parentGPU := vfToParent[mdev.VFAddress]
427+
if parentGPU == "" {
428+
continue
429+
}
430+
usageByGPU[parentGPU] += getProfileFramebufferMB(mdev.ProfileType)
431+
}
432+
433+
return usageByGPU
434+
}
435+
436+
// selectLeastLoadedVF selects a VF from the GPU with the most available VRAM
437+
// that can create the requested profile. Returns empty string if none available.
438+
func selectLeastLoadedVF(ctx context.Context, vfs []VirtualFunction, profileType string) string {
439+
log := logger.FromContext(ctx)
440+
441+
// Get active mdevs to calculate VRAM usage
442+
mdevs, _ := ListMdevDevices()
443+
444+
// Calculate VRAM usage per GPU
445+
vramUsage := calculateGPUVRAMUsage(vfs, mdevs)
446+
447+
// Group free VFs by parent GPU
448+
freeVFsByGPU := make(map[string][]VirtualFunction)
449+
allGPUs := make(map[string]bool)
450+
for _, vf := range vfs {
451+
allGPUs[vf.ParentGPU] = true
452+
if !vf.HasMdev {
453+
freeVFsByGPU[vf.ParentGPU] = append(freeVFsByGPU[vf.ParentGPU], vf)
454+
}
455+
}
456+
457+
// Build list of GPUs sorted by VRAM usage (ascending = least loaded first)
458+
type gpuLoad struct {
459+
gpu string
460+
usedMB int
461+
}
462+
var gpuLoads []gpuLoad
463+
for gpu := range allGPUs {
464+
gpuLoads = append(gpuLoads, gpuLoad{gpu: gpu, usedMB: vramUsage[gpu]})
465+
}
466+
sort.Slice(gpuLoads, func(i, j int) bool {
467+
return gpuLoads[i].usedMB < gpuLoads[j].usedMB
468+
})
469+
470+
log.DebugContext(ctx, "GPU VRAM usage for load balancing",
471+
"gpu_count", len(gpuLoads),
472+
"profile_type", profileType)
473+
474+
// Try each GPU in order of least loaded
475+
for _, gl := range gpuLoads {
476+
freeVFs := freeVFsByGPU[gl.gpu]
477+
if len(freeVFs) == 0 {
478+
log.DebugContext(ctx, "skipping GPU: no free VFs",
479+
"gpu", gl.gpu,
480+
"used_mb", gl.usedMB)
481+
continue
482+
}
483+
484+
// Check if any free VF on this GPU can create the profile
485+
for _, vf := range freeVFs {
486+
availPath := filepath.Join(mdevBusPath, vf.PCIAddress, "mdev_supported_types", profileType, "available_instances")
487+
data, err := os.ReadFile(availPath)
488+
if err != nil {
489+
continue
490+
}
491+
instances, err := strconv.Atoi(strings.TrimSpace(string(data)))
492+
if err != nil || instances < 1 {
493+
continue
494+
}
495+
496+
log.DebugContext(ctx, "selected VF from least loaded GPU",
497+
"vf", vf.PCIAddress,
498+
"gpu", gl.gpu,
499+
"gpu_used_mb", gl.usedMB)
500+
return vf.PCIAddress
501+
}
502+
503+
log.DebugContext(ctx, "skipping GPU: no VF can create profile",
504+
"gpu", gl.gpu,
505+
"used_mb", gl.usedMB,
506+
"profile_type", profileType)
507+
}
508+
509+
return ""
510+
}
511+
355512
// CreateMdev creates an mdev device for the given profile and instance.
356513
// It finds an available VF and creates the mdev, returning the device info.
357514
// This function is thread-safe and uses a mutex to prevent race conditions
@@ -369,32 +526,19 @@ func CreateMdev(ctx context.Context, profileName, instanceID string) (*MdevDevic
369526
return nil, err
370527
}
371528

372-
// Find an available VF
529+
// Discover all VFs
373530
vfs, err := DiscoverVFs()
374531
if err != nil {
375532
return nil, fmt.Errorf("discover VFs: %w", err)
376533
}
377534

378-
var targetVF string
379-
for _, vf := range vfs {
380-
// Skip VFs that already have an mdev
381-
if vf.HasMdev {
382-
continue
383-
}
384-
// Check if this VF can create the profile
385-
availPath := filepath.Join(mdevBusPath, vf.PCIAddress, "mdev_supported_types", profileType, "available_instances")
386-
data, err := os.ReadFile(availPath)
387-
if err != nil {
388-
continue
389-
}
390-
instances, err := strconv.Atoi(strings.TrimSpace(string(data)))
391-
if err != nil || instances < 1 {
392-
continue
393-
}
394-
targetVF = vf.PCIAddress
395-
break
535+
// Ensure profile cache is populated (needed for VRAM calculation)
536+
if len(vfs) > 0 {
537+
_ = getCachedProfiles(vfs[0].PCIAddress)
396538
}
397539

540+
// Select VF from the least loaded GPU (by VRAM usage)
541+
targetVF := selectLeastLoadedVF(ctx, vfs, profileType)
398542
if targetVF == "" {
399543
return nil, fmt.Errorf("no available VF for profile %q", profileName)
400544
}

0 commit comments

Comments
 (0)