From f39f3f26ddee8299cd85280dad547c6f9cd9580b Mon Sep 17 00:00:00 2001 From: Ira Date: Tue, 9 Sep 2025 10:45:22 +0300 Subject: [PATCH 1/2] Wait after starting rank 0 sim Signed-off-by: Ira --- pkg/llm-d-inference-sim/simulator.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index 24446685..b5be4740 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -152,8 +152,8 @@ func (s *VllmSimulator) Start(ctx context.Context) error { } // For Data Parallel, start data-parallel-size - 1 additional simulators + g, ctx := errgroup.WithContext(ctx) if s.config.DPSize > 1 { - g, ctx := errgroup.WithContext(context.Background()) for i := 2; i <= s.config.DPSize; i++ { newConfig, err := s.config.Copy() if err != nil { @@ -170,12 +170,15 @@ func (s *VllmSimulator) Start(ctx context.Context) error { return newSim.startSim(ctx) }) } - if err := g.Wait(); err != nil { - return err - } s.logger = klog.LoggerWithValues(s.logger, "rank", 0) } - return s.startSim(ctx) + if err := s.startSim(ctx); err != nil { + return err + } + if err := g.Wait(); err != nil { + return err + } + return nil } func (s *VllmSimulator) startSim(ctx context.Context) error { From a77c8c5ec615f1fa10d02156a5807d428f15f11e Mon Sep 17 00:00:00 2001 From: Ira Date: Tue, 9 Sep 2025 11:24:52 +0300 Subject: [PATCH 2/2] Start rank 0 similar to others Signed-off-by: Ira --- pkg/llm-d-inference-sim/simulator.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index b5be4740..596cf6f9 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -172,9 +172,9 @@ func (s *VllmSimulator) Start(ctx context.Context) error { } s.logger = klog.LoggerWithValues(s.logger, "rank", 0) } - if err := s.startSim(ctx); err != nil { - return err - } + g.Go(func() error { + return s.startSim(ctx) + }) if err := g.Wait(); err != nil { return err }