Skip to content

Commit 49107e7

Browse files
authored
fix: rename compute-isolation to isolation, add integralDecayFactor for PID controller, bump helm version (#418)
* fix: rename compute-isolation to isolation, add integralDecayFactor for PID controller, bump helm version * fix: bump dependency versions
1 parent 5b3db0f commit 49107e7

15 files changed

+124
-113
lines changed

api/v1/schedulingconfigtemplate_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,9 @@ type ElasticRateLimitParameters struct {
297297
// token bucket min and max
298298
CapacityMin string `json:"capacityMin,omitempty"`
299299
CapacityMax string `json:"capacityMax,omitempty"`
300+
301+
// Decay factor for integral term in PID controller, to avoid integral windup
302+
IntegralDecayFactor string `json:"integralDecayFactor,omitempty"`
300303
}
301304

302305
// SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.

api/v1/workloadprofile_types.go

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ type WorkloadProfileSpec struct {
6060

6161
// +optional
6262
// +kubebuilder:default=soft
63-
// How to isolate computing resources, could be `shared` or `soft` or `hard`
64-
ComputeIsolation ComputingIsolationMode `json:"computeIsolation,omitempty"`
63+
// How to isolate resources, could be `shared` or `soft` or `hard` or `partitioned`
64+
Isolation IsolationModeType `json:"isolation,omitempty"`
6565

6666
// +optional
6767
// GPUModel specifies the required GPU model (e.g., "A100", "H100")
@@ -91,13 +91,30 @@ type WorkloadProfileSpec struct {
9191
WorkerPodTemplate *runtime.RawExtension `json:"workerPodTemplate,omitempty"`
9292
}
9393

94-
// +kubebuilder:validation:Enum=shared;soft;hard
95-
type ComputingIsolationMode string
94+
// +kubebuilder:validation:Enum=shared;soft;hard;partitioned
95+
type IsolationModeType string
9696

9797
const (
98-
ComputingIsolationModeShared = "shared"
99-
ComputingIsolationModeSoft = "soft"
100-
ComputingIsolationModeHard = "hard"
98+
// no limits, rely on GPU built-in time-slicing, each process gets equal share of GPU
99+
// Pros: simple and stable, no performance overhead, maximize GPU utilization when well-scheduled
100+
// Cons: can not auto-scale and differentiate QoS levels, TFLOPs limit does not take effect, may cause resource contention
101+
IsolationModeShared = "shared"
102+
103+
// default isolation mode, use Proportional-Integral-Derivative controller to isolate computing resources and assign time slices
104+
// Pros: can set QoS levels for different workloads, TFLOPs limit is relatively accurate
105+
// Cons: ~1% performance overhead, resource contention may occur when burst credits are consumed
106+
IsolationModeSoft = "soft"
107+
108+
// use dedicated SMs to isolate computing resources
109+
// Pros: better performance isolation, no performance overhead, oversubscription is possible
110+
// Cons: can not auto-scale dynamically, percent may not 1%/1TFLOPs accuracy, coupled with GPU vendor's SM partitioning implementation
111+
// NOTE: this can only be used in Remote or Local+SidecarWorker mode, not supported in LocalGPU mode (because no TensorFusion Worker)
112+
IsolationModeHard = "hard"
113+
114+
// use GPU driver level partitioning to isolate resources, need hardware support
115+
// Pros: no performance overhead, no resource contention, fully-isolated
116+
// Cons: not supported by all GPUs/XPUs, oversubscription is not possible
117+
IsolationModePartitioned = "partitioned"
101118
)
102119

103120
func (t WorkloadProfileSpec) IsDynamicReplica() bool {

charts/tensor-fusion/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.7.2
18+
version: 1.7.3
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to
2222
# follow Semantic Versioning. They should reflect the version the application is using.
2323
# It is recommended to use it with quotes.
24-
appVersion: "1.48.3"
24+
appVersion: "1.48.5"

charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ spec:
274274
description: Filter ineffective requests from rate limit,
275275
0.0 to 1.0
276276
type: string
277+
integralDecayFactor:
278+
description: Decay factor for integral term in PID controller,
279+
to avoid integral windup
280+
type: string
277281
kd:
278282
type: string
279283
ki:

charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionworkloads.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,6 @@ spec:
238238
type: object
239239
type: array
240240
type: object
241-
computeIsolation:
242-
default: soft
243-
description: How to isolate computing resources, could be `shared`
244-
or `soft` or `hard`
245-
enum:
246-
- shared
247-
- soft
248-
- hard
249-
type: string
250241
gpuCount:
251242
description: The number of GPUs to be used by the workload, default
252243
to 1
@@ -266,6 +257,16 @@ spec:
266257
description: Schedule the workload to the same GPU server that runs
267258
vGPU worker for best performance, default to false
268259
type: boolean
260+
isolation:
261+
default: soft
262+
description: How to isolate resources, could be `shared` or `soft`
263+
or `hard` or `partitioned`
264+
enum:
265+
- shared
266+
- soft
267+
- hard
268+
- partitioned
269+
type: string
269270
nodeAffinity:
270271
description: NodeAffinity specifies the node affinity requirements
271272
for the workload

charts/tensor-fusion/crds/tensor-fusion.ai_workloadprofiles.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,6 @@ spec:
225225
type: object
226226
type: array
227227
type: object
228-
computeIsolation:
229-
default: soft
230-
description: How to isolate computing resources, could be `shared`
231-
or `soft` or `hard`
232-
enum:
233-
- shared
234-
- soft
235-
- hard
236-
type: string
237228
gpuCount:
238229
description: The number of GPUs to be used by the workload, default
239230
to 1
@@ -253,6 +244,16 @@ spec:
253244
description: Schedule the workload to the same GPU server that runs
254245
vGPU worker for best performance, default to false
255246
type: boolean
247+
isolation:
248+
default: soft
249+
description: How to isolate resources, could be `shared` or `soft`
250+
or `hard` or `partitioned`
251+
enum:
252+
- shared
253+
- soft
254+
- hard
255+
- partitioned
256+
type: string
256257
nodeAffinity:
257258
description: NodeAffinity specifies the node affinity requirements
258259
for the workload

charts/tensor-fusion/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ controller:
3131
image:
3232
repository: tensorfusion/tensor-fusion-operator
3333
# Overrides the image tag whose default is the chart appVersion.
34-
tag: "1.48.2"
34+
tag: "1.48.5"
3535
# This is for setting Kubernetes Annotations to a Pod.
3636
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
3737

config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ spec:
274274
description: Filter ineffective requests from rate limit,
275275
0.0 to 1.0
276276
type: string
277+
integralDecayFactor:
278+
description: Decay factor for integral term in PID controller,
279+
to avoid integral windup
280+
type: string
277281
kd:
278282
type: string
279283
ki:

config/crd/bases/tensor-fusion.ai_tensorfusionworkloads.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,6 @@ spec:
238238
type: object
239239
type: array
240240
type: object
241-
computeIsolation:
242-
default: soft
243-
description: How to isolate computing resources, could be `shared`
244-
or `soft` or `hard`
245-
enum:
246-
- shared
247-
- soft
248-
- hard
249-
type: string
250241
gpuCount:
251242
description: The number of GPUs to be used by the workload, default
252243
to 1
@@ -266,6 +257,16 @@ spec:
266257
description: Schedule the workload to the same GPU server that runs
267258
vGPU worker for best performance, default to false
268259
type: boolean
260+
isolation:
261+
default: soft
262+
description: How to isolate resources, could be `shared` or `soft`
263+
or `hard` or `partitioned`
264+
enum:
265+
- shared
266+
- soft
267+
- hard
268+
- partitioned
269+
type: string
269270
nodeAffinity:
270271
description: NodeAffinity specifies the node affinity requirements
271272
for the workload

config/crd/bases/tensor-fusion.ai_workloadprofiles.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,6 @@ spec:
225225
type: object
226226
type: array
227227
type: object
228-
computeIsolation:
229-
default: soft
230-
description: How to isolate computing resources, could be `shared`
231-
or `soft` or `hard`
232-
enum:
233-
- shared
234-
- soft
235-
- hard
236-
type: string
237228
gpuCount:
238229
description: The number of GPUs to be used by the workload, default
239230
to 1
@@ -253,6 +244,16 @@ spec:
253244
description: Schedule the workload to the same GPU server that runs
254245
vGPU worker for best performance, default to false
255246
type: boolean
247+
isolation:
248+
default: soft
249+
description: How to isolate resources, could be `shared` or `soft`
250+
or `hard` or `partitioned`
251+
enum:
252+
- shared
253+
- soft
254+
- hard
255+
- partitioned
256+
type: string
256257
nodeAffinity:
257258
description: NodeAffinity specifies the node affinity requirements
258259
for the workload

0 commit comments

Comments
 (0)