Skip to content

Commit 50bbec0

Browse files
authored
DP bug fix: wait after starting rank 0 sim (#193)
* Wait after starting rank 0 sim Signed-off-by: Ira <[email protected]> * Start rank 0 similar to others Signed-off-by: Ira <[email protected]> --------- Signed-off-by: Ira <[email protected]>
1 parent 9067bc8 commit 50bbec0

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

pkg/llm-d-inference-sim/simulator.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
155155
}
156156

157157
// For Data Parallel, start data-parallel-size - 1 additional simulators
158+
g, ctx := errgroup.WithContext(ctx)
158159
if s.config.DPSize > 1 {
159-
g, ctx := errgroup.WithContext(context.Background())
160160
for i := 2; i <= s.config.DPSize; i++ {
161161
newConfig, err := s.config.Copy()
162162
if err != nil {
@@ -173,12 +173,15 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
173173
return newSim.startSim(ctx)
174174
})
175175
}
176-
if err := g.Wait(); err != nil {
177-
return err
178-
}
179176
s.logger = klog.LoggerWithValues(s.logger, "rank", 0)
180177
}
181-
return s.startSim(ctx)
178+
g.Go(func() error {
179+
return s.startSim(ctx)
180+
})
181+
if err := g.Wait(); err != nil {
182+
return err
183+
}
184+
return nil
182185
}
183186

184187
func (s *VllmSimulator) startSim(ctx context.Context) error {

0 commit comments

Comments
 (0)