Skip to content

Commit 057827b

Browse files
authored
fix: add limiter for worker (#61)
1 parent 8ff6faa commit 057827b

File tree

3 files changed

+15
-4
lines changed

3 files changed

+15
-4
lines changed

internal/constants/constants.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@ const (
4444
ConnectionNameEnv = "TENSOR_FUSION_CONNECTION_NAME"
4545
ConnectionNamespaceEnv = "TENSOR_FUSION_CONNECTION_NAMESPACE"
4646

47-
WorkerPortEnv = "TENSOR_FUSION_WORKER_PORT"
48-
NamespaceEnv = "OPERATOR_NAMESPACE"
49-
NamespaceDefaultVal = "tensor-fusion"
47+
WorkerPortEnv = "TENSOR_FUSION_WORKER_PORT"
48+
WokerCudaUpLimitEnv = "TENSOR_FUSION_CUDA_UP_LIMIT"
49+
WokerCudaMemLimitEnv = "TENSOR_FUSION_CUDA_MEM_LIMIT"
50+
NamespaceEnv = "OPERATOR_NAMESPACE"
51+
NamespaceDefaultVal = "tensor-fusion"
5052
)
5153

5254
const (

internal/controller/tensorfusionworkload_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ func (r *TensorFusionWorkloadReconciler) tryStartWorker(
165165
if errors.IsNotFound(err) {
166166
// Pod doesn't exist, create a new one
167167
port := workerGenerator.AllocPort()
168-
pod, err = workerGenerator.GenerateWorkerPod(gpu, namespacedName, port)
168+
pod, err = workerGenerator.GenerateWorkerPod(gpu, namespacedName, port, workload.Spec.Resources.Limits)
169169
if err != nil {
170170
return nil, fmt.Errorf("generate worker pod %w", err)
171171
}

internal/worker/worker.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ func (wg *WorkerGenerator) GenerateWorkerPod(
4848
gpu *tfv1.GPU,
4949
namespacedName types.NamespacedName,
5050
port int,
51+
limits tfv1.Resource,
5152
) (*corev1.Pod, error) {
5253
podTmpl := &corev1.PodTemplate{}
5354
err := json.Unmarshal(wg.WorkerConfig.PodTemplate.Raw, podTmpl)
@@ -78,6 +79,14 @@ func (wg *WorkerGenerator) GenerateWorkerPod(
7879
}, corev1.EnvVar{
7980
Name: constants.WorkerPortEnv,
8081
Value: strconv.Itoa(port),
82+
}, corev1.EnvVar{
83+
Name: constants.WokerCudaUpLimitEnv,
84+
// TODO: convert tflops to percent
85+
Value: "100",
86+
}, corev1.EnvVar{
87+
Name: constants.WokerCudaMemLimitEnv,
88+
// bytesize
89+
Value: strconv.FormatInt(limits.Vram.Value(), 10),
8190
})
8291

8392
return &corev1.Pod{

0 commit comments

Comments
 (0)