@@ -60,8 +60,8 @@ type WorkloadProfileSpec struct {
6060
6161 // +optional
6262 // +kubebuilder:default=soft
63- // How to isolate computing resources, could be `shared` or `soft` or `hard`
64- ComputeIsolation ComputingIsolationMode `json:"computeIsolation ,omitempty"`
63+ // How to isolate resources, could be `shared` or `soft` or `hard` or `partitioned `
64+ Isolation IsolationModeType `json:"isolation ,omitempty"`
6565
6666 // +optional
6767 // GPUModel specifies the required GPU model (e.g., "A100", "H100")
@@ -91,13 +91,30 @@ type WorkloadProfileSpec struct {
9191 WorkerPodTemplate * runtime.RawExtension `json:"workerPodTemplate,omitempty"`
9292}
9393
94- // +kubebuilder:validation:Enum=shared;soft;hard
95- type ComputingIsolationMode string
94+ // +kubebuilder:validation:Enum=shared;soft;hard;partitioned
95+ type IsolationModeType string
9696
9797const (
98- ComputingIsolationModeShared = "shared"
99- ComputingIsolationModeSoft = "soft"
100- ComputingIsolationModeHard = "hard"
98+ // no limits, rely on GPU built-in time-slicing, each process gets equal share of GPU
99+ // Pros: simple and stable, no performance overhead, maximize GPU utilization when well-scheduled
100+ // Cons: can not auto-scale and differentiate QoS levels, TFLOPs limit does not take effect, may cause resource contention
101+ IsolationModeShared = "shared"
102+
103+ // default isolation mode, use Proportional-Integral-Derivative controller to isolate computing resources and assign time slices
104+ // Pros: can set QoS levels for different workloads, TFLOPs limit is relatively accurate
105+ // Cons: ~1% performance overhead, resource contention may occur when burst credits are consumed
106+ IsolationModeSoft = "soft"
107+
108+ // use dedicated SMs to isolate computing resources
109+ // Pros: better performance isolation, no performance overhead, oversubscription is possible
110+ // Cons: can not auto-scale dynamically, percent may not 1%/1TFLOPs accuracy, coupled with GPU vendor's SM partitioning implementation
111+ // NOTE: this can only be used in Remote or Local+SidecarWorker mode, not supported in LocalGPU mode (because no TensorFusion Worker)
112+ IsolationModeHard = "hard"
113+
114+ // use GPU driver level partitioning to isolate resources, need hardware support
115+ // Pros: no performance overhead, no resource contention, fully-isolated
116+ // Cons: not supported by all GPUs/XPUs, oversubscription is not possible
117+ IsolationModePartitioned = "partitioned"
101118)
102119
103120func (t WorkloadProfileSpec ) IsDynamicReplica () bool {
0 commit comments