fix: env override for single node process, mem limit env var issue (#561)

Code2Life · web-flow · commit bd2643ec77c8 · 2026-02-05T22:14:19.000+08:00
* fix: optimize readme

* fix: env override for single node process

* fix: update hard memory limiter environment variable for GPU configuration
diff --git a/README.md b/README.md
@@ -40,25 +40,22 @@ Tensor Fusion is a state-of-the-art **GPU virtualization and pooling solution**
 - [Run vGPU in VM Hypervisor](https://tensor-fusion.ai/guide/getting-started/deployment-vm)
 - [Learn Essential Concepts & Architecture](https://tensor-fusion.ai/guide/getting-started/architecture)
 
-<!-- (TODO: Asciinema) -->
-
 ### 💬 Discussion
 
 - Discord channel: [https://discord.gg/2bybv9yQNk](https://discord.gg/2bybv9yQNk)
 - Discuss anything about TensorFusion: [Github Discussions](https://github.com/NexusGPU/tensor-fusion/discussions)
-- Contact us with WeCom for Greater China region: [企业微信](https://work.weixin.qq.com/ca/cawcde42751d9f6a29) 
+- Contact us with WeCom for Greater China region: [企业微信](https://work.weixin.qq.com/ca/cawcde42751d9f6a29)
 - Email us: [support@tensor-fusion.com](mailto:support@tensor-fusion.com)
 - Schedule [1:1 meeting with TensorFusion founders](https://tensor-fusion.ai/book-demo)
 
-
 ## 🔮 Features & Roadmap
 
 ### Core GPU Virtualization Features
 
 - [x] Fractional GPU and flexible oversubscription
 - [x] Remote GPU sharing with SOTA GPU-over-IP technology, less than 4% performance loss
 - [x] GPU VRAM expansion and hot/cold tiering
-- [x] None NVIDIA GPU/NPU vendor support
+- [x] Non-NVIDIA GPU/NPU vendor support
 
 ### Pooling & Scheduling & Management
 
@@ -67,7 +64,7 @@ Tensor Fusion is a state-of-the-art **GPU virtualization and pooling solution**
 - [x] GPU node auto provisioning/termination, Karpenter integration
 - [x] GPU compaction/bin-packing
 - [x] Take full control of GPU allocation with precision targeting by vendor, model, device index, and more
-- [x] Seamless onboarding experience for Pytorch, TensorFlow, llama.cpp, vLLM, Tensor-RT, SGlang and all popular AI training/serving frameworks
+- [x] Seamless onboarding experience for PyTorch, TensorFlow, llama.cpp, vLLM, TensorRT, SGLang and all popular AI training/serving frameworks
 - [x] Seamless migration from existing NVIDIA operator and device-plugin stack
 - [x] Centralized Dashboard & Control Plane
 - [x] GPU-first autoscaling policies, auto set requests/limits/replicas
diff --git a/pkg/constants/env.go b/pkg/constants/env.go
@@ -138,7 +138,7 @@ const (
 	// hard limiter (not open sourced) in megabytes, only take effect on worker container and
 	// when open source vgpu.rs gpu-limiter is disabled
 	// when use this mode, memory request can not autoscale dynamically
-	HardMemLimiterEnv = "TF_CUDA_MEMORY_LIMIT"
+	HardMemLimiterEnv = "TF_GPU_MEMORY_LIMIT"
 
 	TensorFusionRemoteWorkerPortNumber = 8000
 	TensorFusionRemoteWorkerPortName   = "remote-vgpu"
diff --git a/pkg/hypervisor/backend/single_node/single_node_backend.go b/pkg/hypervisor/backend/single_node/single_node_backend.go
@@ -9,6 +9,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"strings"
 	"sync"
 	"syscall"
 	"time"
@@ -371,8 +372,22 @@ func (b *SingleNodeBackend) buildCmd(ps *processState) (*exec.Cmd, io.Closer, er
 	}
 
 	cmd := exec.Command(ps.executable, ps.args...)
-	cmd.Env = os.Environ()
+
+	// Build environment: start with current environment, then override with ps.env
+	envMap := make(map[string]string)
+	for _, env := range os.Environ() {
+		parts := strings.SplitN(env, "=", 2)
+		if len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+	// Override with custom environment variables
 	for k, v := range ps.env {
+		envMap[k] = v
+	}
+	// Convert back to []string format
+	cmd.Env = make([]string, 0, len(envMap))
+	for k, v := range envMap {
 		cmd.Env = append(cmd.Env, k+"="+v)
 	}
 	if ps.workingDir != "" {