@@ -25,21 +25,65 @@ import (
2525
2626// GPUNodeSpec defines the desired state of GPUNode.
2727type GPUNodeSpec struct {
28- // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
29- // Important: Run "make" to regenerate code after modifying this file
28+ ManageMode GPUNodeManageMode `json:"manageMode,omitempty"`
3029
31- // Foo is an example field of GPUNode. Edit gpunode_types.go to remove/update
32- Foo string `json:"foo,omitempty"`
30+ // if not all GPU cards should be used, specify the GPU card indices, default to empty,
31+ // onboard all GPU cards to the pool
32+ GPUCardIndices []int `json:"gpuCardIndices,omitempty"`
3333}
3434
35+ type GPUNodeManageMode string
36+
37+ const (
38+ GPUNodeManageModeNone GPUNodeManageMode = "manual"
39+ GPUNodeManageModeAuto GPUNodeManageMode = "selected"
40+ GPUNodeManageModeManual GPUNodeManageMode = "provisioned"
41+ )
42+
3543// GPUNodeStatus defines the observed state of GPUNode.
3644type GPUNodeStatus struct {
37- // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
38- // Important: Run "make" to regenerate code after modifying this file
45+ Phase TensorFusionClusterPhase `json:"phase,omitempty"`
46+
47+ Conditions []metav1.Condition `json:"conditions,omitempty"`
48+
49+ TotalTFlops int32 `json:"totalTFlops,omitempty"`
50+ TotalVRAM string `json:"totalVRAM,omitempty"`
51+
52+ AvailableTFlops int32 `json:"availableTFlops,omitempty"`
53+ AvailableVRAM string `json:"availableVRAM,omitempty"`
54+
55+ HypervisorStatus NodeHypervisorStatus `json:"hypervisorStatus,omitempty"`
56+
57+ NodeInfo GPUNodeInfo `json:"nodeInfo,omitempty"`
58+
59+ LoadedModels []string `json:"loadedModels,omitempty"`
60+
61+ TotalGPUs int32 `json:"totalGPUs,omitempty"`
62+ ManagedGPUs int32 `json:"managedGPUs,omitempty"`
63+ ManagedGPUResourceIDs []string `json:"managedGPUResourceIDs,omitempty"`
64+ }
65+
66+ type GPUNodeInfo struct {
67+ Hostname string `json:"hostname,omitempty"`
68+ IP string `json:"ip,omitempty"`
69+ KernalVersion string `json:"kernalVersion,omitempty"`
70+ OSImage string `json:"osImage,omitempty"`
71+ GPUDriverVersion string `json:"gpuDriverVersion,omitempty"`
72+ GPUModel string `json:"gpuModel,omitempty"`
73+ GPUCount int32 `json:"gpuCount,omitempty"`
74+ OperatingSystem string `json:"operatingSystem,omitempty"`
75+ Architecture string `json:"architecture,omitempty"`
76+ }
77+
78+ type NodeHypervisorStatus struct {
79+ HypervisorState string `json:"hypervisorState,omitempty"`
80+ HypervisorVersion string `json:"hypervisorVersion,omitempty"`
81+ LastHeartbeatTime metav1.Time `json:"lastHeartbeatTime,omitempty"`
3982}
4083
4184// +kubebuilder:object:root=true
4285// +kubebuilder:subresource:status
86+ // +kubebuilder:resource:scope=Cluster
4387
4488// GPUNode is the Schema for the gpunodes API.
4589type GPUNode struct {
0 commit comments