NexusGPU
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.vscode/launch.json‎
Lines changed: 4 additions & 1 deletion b/‎.vscode/launch.json‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.vscode/settings.json‎
Lines changed: 7 additions & 0 deletions b/‎.vscode/settings.json‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 1 addition & 6 deletions b/‎Makefile‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎api/v1/gpu_types.go‎
Lines changed: 12 additions & 0 deletions b/‎api/v1/gpu_types.go‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎api/v1/gpunode_funcs.go‎
Lines changed: 1 addition & 1 deletion b/‎api/v1/gpunode_funcs.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎api/v1/gpunode_types.go‎
Lines changed: 1 addition & 13 deletions b/‎api/v1/gpunode_types.go‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎api/v1/gpupool_types.go‎
Lines changed: 3 additions & 1 deletion b/‎api/v1/gpupool_types.go‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎api/v1/zz_generated.deepcopy.go‎
Lines changed: 34 additions & 25 deletions b/‎api/v1/zz_generated.deepcopy.go‎
Lines changed: 34 additions & 25 deletions
diff --git a/‎charts/tensor-fusion/Chart.yaml‎
Lines changed: 2 additions & 2 deletions b/‎charts/tensor-fusion/Chart.yaml‎
Lines changed: 2 additions & 2 deletions
@@ -12,6 +12,7 @@ Dockerfile.cross
 
 # Output of the go coverage tool, specifically when used with LiteIDE
 *.out
+cover.out.*
 
 # Go workspace file
 go.work
 
@@ -55,7 +55,10 @@
             "type": "go",
             "request": "launch",
             "mode": "test",
-            "program": "${workspaceFolder}",
+            "env": {
+                "GO_TESTING": "true"
+            },
+            "program": "${workspaceFolder}/internal/controller",
             "console": "integratedTerminal"
         }
     ]
 
@@ -1,12 +1,15 @@
 {
     "cSpell.words": [
+        "alertmanager",
         "alicloud",
         "Aliyun",
         "AMDCDNA",
         "AMDRDNA",
         "apimachinery",
+        "automount",
         "AWSGPU",
         "batchv",
+        "burstable",
         "CDNA",
         "certificaterequests",
         "certmanager",
@@ -39,6 +42,7 @@
         "greptime",
         "greptimedb",
         "healthz",
+        "iface",
         "karpenter",
         "kubebuilder",
         "KUBECONFIG",
@@ -51,6 +55,7 @@
         "NVML",
         "omitempty",
         "onsi",
+        "portallocator",
         "printcolumn",
         "prometheusagents",
         "prometheuses",
@@ -62,11 +67,13 @@
         "schedulingconfigtemplates",
         "schedulingcorev",
         "shirou",
+        "strategicpatches",
         "subresource",
         "tensorfusion",
         "tensorfusionaiv",
         "tensorfusioncluster",
         "tensorfusionclusters",
+        "tensorfusionworkload",
         "Tera",
         "tflops",
         "Tmpl",
 
@@ -62,13 +62,8 @@ vet: ## Run go vet against code.
 
 .PHONY: test
 test: manifests generate fmt vet envtest ## Run tests.
-	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -timeout 0 -coverprofile cover.out
+	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" GO_TESTING=true go run github.com/onsi/ginkgo/v2/ginkgo -p -timeout 0 -cover -coverprofile cover.out -r --skip-file ./test/e2e
 
-# TODO(user): To use a different vendor for e2e tests, modify the setup under 'tests/e2e'.
-# The default setup assumes Kind is pre-installed and builds/loads the Manager Docker image locally.
-# Prometheus and CertManager are installed by default; skip with:
-# - PROMETHEUS_INSTALL_SKIP=true
-# - CERT_MANAGER_INSTALL_SKIP=true
 .PHONY: test-e2e
 test-e2e: manifests generate fmt vet ## Run the e2e tests. Expected an isolated environment using Kind.
 	@command -v kind >/dev/null 2>&1 || { \
 
@@ -36,6 +36,18 @@ type GPUStatus struct {
 	GPUModel     string            `json:"gpuModel"`
 
 	Message string `json:"message"`
+
+	// +optional
+	RunningApps []*RunningAppDetail `json:"runningApps,omitempty"`
+}
+
+type RunningAppDetail struct {
+	// Workload name namespace
+	Name      string `json:"name,omitempty"`
+	Namespace string `json:"namespace,omitempty"`
+
+	// Worker count
+	Count int `json:"count"`
 }
 
 // +kubebuilder:validation:Enum=Pending;Provisioning;Running;Unknown;Destroying;Migrating
 
@@ -12,7 +12,7 @@ func (node *GPUNode) InitializeStatus(initTFlops, initVRAM resource.Quantity, in
 		TotalTFlops:         initTFlops,
 		TotalVRAM:           initVRAM,
 		TotalGPUs:           initGPUs,
-		AllocationDetails:   &[]GPUNodeAllocationDetails{},
+		AllocationInfo:      []*RunningAppDetail{},
 		LoadedModels:        &[]string{},
 		ManagedGPUDeviceIDs: []string{},
 		ObservedGeneration:  node.Generation,
 
@@ -94,20 +94,8 @@ type GPUNodeStatus struct {
 
 	ObservedGeneration int64 `json:"observedGeneration,omitempty"`
 
-	// Allocation details is for node compaction, and calculate used apps
 	// +optional
-	AllocationDetails *[]GPUNodeAllocationDetails `json:"allocationDetails,omitempty"`
-}
-
-type GPUNodeAllocationDetails struct {
-	PodID        string `json:"podID,omitempty"`
-	PodName      string `json:"podName,omitempty"`
-	Namespace    string `json:"namespace"`
-	WorkloadName string `json:"workload,omitempty"`
-
-	Requests GPUResourceUnit `json:"requests"`
-	Limits   GPUResourceUnit `json:"limits"`
-	QoS      QoSLevel        `json:"qos,omitempty"`
+	AllocationInfo []*RunningAppDetail `json:"allocationInfo,omitempty"`
 }
 
 // +kubebuilder:validation:Enum=Pending;Provisioning;Migrating;Running;Succeeded;Failed;Unknown;Destroying
 
@@ -293,7 +293,7 @@ type QosPricing struct {
 
 	Requests GPUResourcePricingUnit `json:"requests,omitempty"`
 
-	// Default requests and limitsOverRequests are same, indicates normal on-demand serverless GPU usage, in hands-on lab low QoS case, limitsOverRequests should be cheaper, for example Low QoS, ratio should be 0.5
+	// Default requests and limitsOverRequests are same, indicates normal on-demand serverless GPU usage, in hands-on lab low QoS case, limitsOverRequests should be lower, so that user can get burstable GPU resources with very low cost
 	// +kubebuilder:default="1"
 	LimitsOverRequestsChargingRatio string `json:"limitsOverRequests,omitempty"`
 }
@@ -372,6 +372,8 @@ type GPUPoolStatus struct {
 	AvailableTFlops resource.Quantity `json:"availableTFlops"`
 	AvailableVRAM   resource.Quantity `json:"availableVRAM"`
 
+	RunningAppsCnt int32 `json:"runningAppsCnt,omitempty"`
+
 	// +optional
 	VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"`
 	// +optional
 
@@ -15,10 +15,10 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.2.22
+version: 1.3.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.12.1"
+appVersion: "1.30.3"
Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,10 @@`
`55`	`55`	`"type": "go",`
`56`	`56`	`"request": "launch",`
`57`	`57`	`"mode": "test",`
`58`		`- "program": "${workspaceFolder}",`
	`58`	`+ "env": {`
	`59`	`+ "GO_TESTING": "true"`
	`60`	`+ },`
	`61`	`+ "program": "${workspaceFolder}/internal/controller",`
`59`	`62`	`"console": "integratedTerminal"`
`60`	`63`	`}`
`61`	`64`	`]`