Skip to content

Commit 037a910

Browse files
committed
Resolve conflicts and fix test case
Signed-off-by: Qifan Deng <[email protected]>
1 parent 46e5d1e commit 037a910

File tree

2 files changed

+1
-154
lines changed

2 files changed

+1
-154
lines changed

pkg/dataset/custom_dataset_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,8 @@ var _ = Describe("CustomDataset", Ordered, func() {
193193
Prompt: testPrompt,
194194
MaxTokens: &n,
195195
}
196-
tokens, finishReason, err := dataset.GetTokens(req, common.ModeRandom)
196+
tokens, _, err := dataset.GetTokens(req, common.ModeRandom)
197197
Expect(err).NotTo(HaveOccurred())
198-
Expect(finishReason).To(Equal(LengthFinishReason))
199198
Expect(len(tokens)).To(BeNumerically("<=", 2))
200199
})
201200
})

pkg/llm-d-inference-sim/simulator.go

Lines changed: 0 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"github.com/prometheus/client_golang/prometheus"
3030
"github.com/valyala/fasthttp"
3131
"golang.org/x/sync/errgroup"
32-
"gopkg.in/yaml.v3"
3332
"k8s.io/klog/v2"
3433

3534
"github.com/llm-d/llm-d-inference-sim/pkg/common"
@@ -260,69 +259,6 @@ func (s *VllmSimulator) initDataset() error {
260259
return nil
261260
}
262261

263-
func (s *VllmSimulator) newListener() (net.Listener, error) {
264-
s.logger.Info("Server starting", "port", s.config.Port)
265-
listener, err := net.Listen("tcp4", fmt.Sprintf(":%d", s.config.Port))
266-
if err != nil {
267-
return nil, err
268-
}
269-
return listener, nil
270-
}
271-
272-
// startServer starts http server on port defined in command line
273-
func (s *VllmSimulator) startServer(ctx context.Context, listener net.Listener) error {
274-
r := fasthttprouter.New()
275-
276-
// support completion APIs
277-
r.POST("/v1/chat/completions", s.HandleChatCompletions)
278-
r.POST("/v1/completions", s.HandleTextCompletions)
279-
// supports /models API
280-
r.GET("/v1/models", s.HandleModels)
281-
// support load/unload of lora adapter
282-
r.POST("/v1/load_lora_adapter", s.HandleLoadLora)
283-
r.POST("/v1/unload_lora_adapter", s.HandleUnloadLora)
284-
// supports /metrics prometheus API
285-
r.GET("/metrics", fasthttpadaptor.NewFastHTTPHandler(promhttp.HandlerFor(s.registry, promhttp.HandlerOpts{})))
286-
// supports standard Kubernetes health and readiness checks
287-
r.GET("/health", s.HandleHealth)
288-
r.GET("/ready", s.HandleReady)
289-
r.POST("/tokenize", s.HandleTokenize)
290-
291-
server := fasthttp.Server{
292-
ErrorHandler: s.HandleError,
293-
Handler: r.Handler,
294-
Logger: s,
295-
}
296-
297-
// Start server in a goroutine
298-
serverErr := make(chan error, 1)
299-
go func() {
300-
s.logger.Info("HTTP server starting")
301-
serverErr <- server.Serve(listener)
302-
}()
303-
304-
// Wait for either context cancellation or server error
305-
select {
306-
case <-ctx.Done():
307-
s.logger.Info("Shutdown signal received, shutting down HTTP server gracefully")
308-
309-
// Gracefully shutdown the server
310-
if err := server.Shutdown(); err != nil {
311-
s.logger.Error(err, "Error during server shutdown")
312-
return err
313-
}
314-
315-
s.logger.Info("HTTP server stopped")
316-
return nil
317-
318-
case err := <-serverErr:
319-
if err != nil {
320-
s.logger.Error(err, "HTTP server failed")
321-
}
322-
return err
323-
}
324-
}
325-
326262
// Print prints to a log, implementation of fasthttp.Logger
327263
func (s *VllmSimulator) Printf(format string, args ...interface{}) {
328264
s.logger.Info("Server error", "msg", fmt.Sprintf(format, args...))
@@ -594,66 +530,6 @@ func (s *VllmSimulator) createModelsResponse() *vllmapi.ModelsResponse {
594530
return &modelsResp
595531
}
596532

597-
// HandleHealth http handler for /health
598-
func (s *VllmSimulator) HandleHealth(ctx *fasthttp.RequestCtx) {
599-
s.logger.V(4).Info("health request received")
600-
ctx.Response.Header.SetContentType("application/json")
601-
ctx.Response.Header.SetStatusCode(fasthttp.StatusOK)
602-
ctx.Response.SetBody([]byte("{}"))
603-
}
604-
605-
// HandleReady http handler for /ready
606-
func (s *VllmSimulator) HandleReady(ctx *fasthttp.RequestCtx) {
607-
s.logger.V(4).Info("readiness request received")
608-
ctx.Response.Header.SetContentType("application/json")
609-
ctx.Response.Header.SetStatusCode(fasthttp.StatusOK)
610-
ctx.Response.SetBody([]byte("{}"))
611-
}
612-
613-
// getDisplayedModelName returns the model name that must appear in API
614-
// responses. LoRA adapters keep their explicit name, while all base-model
615-
// requests are surfaced as the first alias from --served-model-name.
616-
func (s *VllmSimulator) getDisplayedModelName(reqModel string) string {
617-
if s.isLora(reqModel) {
618-
return reqModel
619-
}
620-
return s.config.ServedModelNames[0]
621-
}
622-
623-
func (s *VllmSimulator) showConfig(dp bool) error {
624-
cfgYAML, err := yaml.Marshal(s.config)
625-
if err != nil {
626-
return fmt.Errorf("failed to marshal configuration to YAML: %w", err)
627-
}
628-
629-
var m map[string]interface{}
630-
err = yaml.Unmarshal(cfgYAML, &m)
631-
if err != nil {
632-
return fmt.Errorf("failed to unmarshal YAML to map: %w", err)
633-
}
634-
if dp {
635-
// remove the port
636-
delete(m, "port")
637-
}
638-
// clean LoraModulesString field
639-
m["lora-modules"] = m["LoraModules"]
640-
delete(m, "LoraModules")
641-
delete(m, "LoraModulesString")
642-
643-
// clean fake-metrics field
644-
if field, ok := m["fake-metrics"].(map[string]interface{}); ok {
645-
delete(field, "LorasString")
646-
}
647-
648-
// show in YAML
649-
cfgYAML, err = yaml.Marshal(m)
650-
if err != nil {
651-
return fmt.Errorf("failed to marshal configuration to YAML: %w", err)
652-
}
653-
s.logger.Info("Configuration:", "", string(cfgYAML))
654-
return nil
655-
}
656-
657533
func (s *VllmSimulator) getCurrFactor() float64 {
658534
if s.config.MaxNumSeqs <= 1 {
659535
return 1.0
@@ -676,31 +552,3 @@ func (s *VllmSimulator) GetPrefillTimePerToken() int {
676552
func (s *VllmSimulator) GetInterTokenLatency() int {
677553
return int(float64(s.config.InterTokenLatency) * s.getCurrFactor())
678554
}
679-
<<<<<<< HEAD
680-
681-
// generateTokens creates and returns response payload based on this request,
682-
// i.e., an array of generated tokens, the finish reason, and the number of generated tokens
683-
func (s *VllmSimulator) generateTokens(req openaiserverapi.CompletionRequest) ([]string, string, int, error) {
684-
ignoreEOS := req.GetIgnoreEOS()
685-
var maxTokens *int64
686-
var prompt string
687-
688-
if chatReq, ok := req.(*openaiserverapi.ChatCompletionRequest); ok {
689-
maxTokens = chatReq.GetMaxCompletionTokens()
690-
prompt = chatReq.GetLastUserMsg()
691-
} else if textReq, ok := req.(*openaiserverapi.TextCompletionRequest); ok {
692-
maxTokens = textReq.MaxTokens
693-
prompt = textReq.GetPrompt()
694-
} else {
695-
return nil, "", 0, fmt.Errorf("unknown request type: %T", req)
696-
}
697-
698-
var finishReason string
699-
var tokens []string
700-
if s.config.Mode == common.ModeEcho {
701-
tokens, finishReason = common.EchoResponseTokens(maxTokens, prompt)
702-
return tokens, finishReason, len(tokens), nil
703-
}
704-
tokens, finishReason = common.GetRandomTokens(maxTokens, ignoreEOS, s.dataset)
705-
return tokens, finishReason, len(tokens), nil
706-
}

0 commit comments

Comments
 (0)