@@ -2,30 +2,51 @@ package worker
22
33import (
44 "fmt"
5+ "strconv"
6+ "time"
57
68 tfv1 "github.com/NexusGPU/tensor-fusion-operator/api/v1"
79 "github.com/NexusGPU/tensor-fusion-operator/internal/config"
10+ "github.com/NexusGPU/tensor-fusion-operator/internal/constants"
11+ "github.com/samber/lo"
12+ "golang.org/x/exp/rand"
813 corev1 "k8s.io/api/core/v1"
914 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1015 "k8s.io/apimachinery/pkg/types"
1116)
1217
18+ func init () {
19+ rand .Seed (uint64 (time .Now ().UnixNano ()))
20+ }
21+
1322type WorkerGenerator struct {
1423 WorkerConfig * config.Worker
1524}
1625
17- func (wg * WorkerGenerator ) GenerateConnectionURL (connection * tfv1.TensorFusionConnection , pod * corev1.Pod ) string {
18- return fmt .Sprintf ("native+%s+%d" , pod .Status .PodIP , wg .WorkerConfig .Port )
26+ func (wg * WorkerGenerator ) GenerateConnectionURL (connection * tfv1.TensorFusionConnection , pod * corev1.Pod ) (string , error ) {
27+ port , ok := lo .Find (pod .Spec .Containers [0 ].Env , func (env corev1.EnvVar ) bool {
28+ return env .Name == constants .WorkerPortEnv
29+ })
30+
31+ if ! ok {
32+ return "" , fmt .Errorf ("worker port not found in pod %s" , pod .Name )
33+ }
34+ return fmt .Sprintf ("native+%s+%d" , pod .Status .PodIP , port .Value ), nil
35+ }
36+
37+ func (wg * WorkerGenerator ) AllocPort () int16 {
38+ min := 30000
39+ max := 65535
40+ return int16 (rand .Intn (max - min + 1 ) + min )
1941}
2042
2143func (wg * WorkerGenerator ) GenerateWorkerPod (
2244 gpu * tfv1.GPU ,
2345 connection * tfv1.TensorFusionConnection ,
2446 namespacedName types.NamespacedName ,
47+ port int16 ,
2548) * corev1.Pod {
26-
2749 spec := wg .WorkerConfig .Template .Spec .DeepCopy ()
28-
2950 if spec .NodeSelector == nil {
3051 spec .NodeSelector = make (map [string ]string )
3152 }
@@ -34,13 +55,16 @@ func (wg *WorkerGenerator) GenerateWorkerPod(
3455 spec .Containers [0 ].Env = append (spec .Containers [0 ].Env , corev1.EnvVar {
3556 Name : "NVIDIA_VISIBLE_DEVICES" ,
3657 Value : gpu .Status .UUID ,
58+ }, corev1.EnvVar {
59+ Name : constants .WorkerPortEnv ,
60+ Value : strconv .Itoa (int (port )),
3761 })
3862
3963 return & corev1.Pod {
4064 ObjectMeta : metav1.ObjectMeta {
4165 Name : namespacedName .Name ,
4266 Namespace : namespacedName .Namespace ,
4367 },
44- Spec : spec ,
68+ Spec : * spec ,
4569 }
4670}
0 commit comments