diff --git a/README.md b/README.md index 0853bd60..7e1f07ba 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,11 @@ In addition, as we are using klog, the following parameters are available: - `skip_headers`: if true, avoid header prefixes in the log messages - `skip_log_headers`: if true, avoid headers when opening log files (no effect when -logtostderr=true) - `stderrthreshold`: logs at or above this threshold go to stderr when writing to files and stderr (no effect when -logtostderr=true or -alsologtostderr=true) (default 2) -- `v`: number for the log level verbosity +- `v`: number for the log level verbosity. Supported levels: + - Warning (1) - warning messages + - Info (2) - general application messages, e.g., loaded configuration content, which responses dataset was loaded, etc. + - Debug (4) - debugging messages, e.g. /completions and /chat/completions request received, load/unload lora request processed, etc. + - Trace (5) - highest verbosity, e.g. detailed messages on completions request handling and request queue processing, etc. - `vmodule`: comma-separated list of pattern=N settings for file-filtered logging ## Environment variables diff --git a/cmd/llm-d-inference-sim/main.go b/cmd/llm-d-inference-sim/main.go index 1212d2b7..ec0dd8a9 100644 --- a/cmd/llm-d-inference-sim/main.go +++ b/cmd/llm-d-inference-sim/main.go @@ -24,6 +24,7 @@ import ( "k8s.io/klog/v2" "github.com/llm-d/llm-d-inference-sim/cmd/signals" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" vllmsim "github.com/llm-d/llm-d-inference-sim/pkg/llm-d-inference-sim" ) @@ -33,11 +34,11 @@ func main() { ctx := klog.NewContext(context.Background(), logger) ctx = signals.SetupSignalHandler(ctx) - logger.Info("Starting vLLM simulator") + logger.V(logging.INFO).Info("Starting vLLM simulator") vllmSim, err := vllmsim.New(logger) if err != nil { - logger.Error(err, "Failed to create vLLM simulator") + logger.Error(err, "failed to create vLLM simulator") return } if err := vllmSim.Start(ctx); err != nil { diff --git a/pkg/common/logging/levels.go b/pkg/common/logging/levels.go new file mode 100644 index 00000000..3c232bc2 --- /dev/null +++ b/pkg/common/logging/levels.go @@ -0,0 +1,24 @@ +/* +Copyright 2025 The llm-d-inference-sim Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logging + +const ( + WARN = 1 + INFO = 2 + DEBUG = 4 + TRACE = 5 +) diff --git a/pkg/common/publisher.go b/pkg/common/publisher.go index 883c05a2..c8dc10ac 100644 --- a/pkg/common/publisher.go +++ b/pkg/common/publisher.go @@ -25,6 +25,7 @@ import ( "sync/atomic" "time" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" zmq "github.com/pebbe/zmq4" "github.com/vmihailenco/msgpack/v5" "k8s.io/klog/v2" @@ -93,7 +94,7 @@ func (p *Publisher) PublishEvent(ctx context.Context, topic string, batch interf return fmt.Errorf("failed to send message to topic %s: %w", topic, err) } - logger.Info("Published event batch", "topic", topic, "seq", seq) + logger.V(logging.TRACE).Info("Published event batch", "topic", topic, "seq", seq) return nil } diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 68d731a4..d937d866 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -23,6 +23,7 @@ import ( "github.com/go-logr/logr" "github.com/google/uuid" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" ) // Definition of buckets for time-to-first-token and time-per-output-token metrics, each value is an upper boundary of a bucket @@ -149,6 +150,6 @@ func WriteToChannel[T any](channel chan T, object T, logger logr.Logger, channel select { case channel <- object: default: - logger.V(1).Info("failed to write to", "channel", channelName) + logger.V(logging.WARN).Info("failed to write to", "channel", channelName) } } diff --git a/pkg/dataset/custom_dataset.go b/pkg/dataset/custom_dataset.go index 34f282eb..557321d6 100644 --- a/pkg/dataset/custom_dataset.go +++ b/pkg/dataset/custom_dataset.go @@ -33,6 +33,7 @@ import ( "github.com/go-logr/logr" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" ) @@ -80,7 +81,7 @@ func (d *CustomDataset) downloadDataset(ctx context.Context, url string, path st } }() - d.logger.Info("Using dataset-url", "dataset-url", url) + d.logger.V(logging.INFO).Info("Using dataset-url", "dataset-url", url) resp, err := http.Get(url) if err != nil { return err @@ -181,9 +182,9 @@ func (pr *progressReader) logProgress(pct int) { speed := float64(pr.downloaded) / (1024 * 1024 * elapsedTime) remainingTime := float64(pr.total-pr.downloaded) / (float64(pr.downloaded) / elapsedTime) if pct != 100 { - pr.logger.Info(fmt.Sprintf("Download progress: %d%%, Speed: %.2f MB/s, Remaining time: %.2fs", pct, speed, remainingTime)) + pr.logger.V(logging.INFO).Info("Dataset download progress", "%", pct, "speed (MB/s)", speed, "remaining time (s)", remainingTime) } else { - pr.logger.Info(fmt.Sprintf("Download completed: 100%%, Average Speed: %.2f MB/s, Total time: %.2fs", speed, elapsedTime)) + pr.logger.V(logging.INFO).Info("Download completed", "average speed (MB/s)", speed, "total time (s)", elapsedTime) } } @@ -248,7 +249,7 @@ func (d *CustomDataset) getRecordsCount() (int, error) { } func (d *CustomDataset) loadDatabaseInMemory(path string) error { - d.logger.Info("Loading database into memory...") + d.logger.V(logging.INFO).Info("Loading database into memory...") start := time.Now() // Create in-memory database @@ -301,7 +302,7 @@ func (d *CustomDataset) loadDatabaseInMemory(path string) error { } loadTime := time.Since(start) - d.logger.Info("Database loaded into memory", "load_time", loadTime.String()) + d.logger.V(logging.INFO).Info("Database loaded into memory", "load_time", loadTime.String()) return nil } @@ -354,9 +355,9 @@ func (d *CustomDataset) connectToDB(path string, useInMemory bool) error { } if useInMemory { - d.logger.Info("In-memory database connected successfully", "path", path, "records count", count) + d.logger.V(logging.INFO).Info("In-memory database connected successfully", "path", path, "records count", count) } else { - d.logger.Info("Database connected successfully", "path", path, "records count", count) + d.logger.V(logging.INFO).Info("Database connected successfully", "path", path, "records count", count) } return nil } @@ -368,7 +369,7 @@ func (d *CustomDataset) Init(ctx context.Context, logger logr.Logger, path strin } d.hasWarned = false if url == "" { - d.logger.Info("Using dataset from", "path", path) + d.logger.V(logging.INFO).Info("Using dataset from", "path", path) return d.connectToDB(path, useInMemory) } _, err := os.Stat(path) @@ -386,7 +387,7 @@ func (d *CustomDataset) Init(ctx context.Context, logger logr.Logger, path strin return fmt.Errorf("failed to download dataset: %w", err) } } - d.logger.Info("Using dataset path", "dataset-path", path) + d.logger.V(logging.INFO).Info("Using dataset path", "dataset-path", path) return d.connectToDB(path, useInMemory) } @@ -448,7 +449,7 @@ func (d *CustomDataset) query(query string, nTokens int, random *common.Random) rows, err := d.db.Query(query) if err != nil { if !d.hasWarned { - d.logger.Error(err, "Failed to query database. Ensure dataset file is still valid. Will generate random tokens instead.") + d.logger.Error(err, "failed to query database. Ensure dataset file is still valid. Will generate random tokens instead.") d.hasWarned = true } return [][]string{GenPresetRandomTokens(random, nTokens)}, nil @@ -472,7 +473,7 @@ func (d *CustomDataset) GenerateTokens(req openaiserverapi.CompletionRequest, nT // filter out results according to finish reason var filteredTokensList [][]string if finishReason != LengthFinishReason && finishReason != StopFinishReason { - d.logger.Error(errors.New("unknown finish reason"), "Unexpected finish reason", "reason", finishReason) + d.logger.Error(errors.New("unknown finish reason"), "unexpected finish reason", "reason", finishReason) } for _, tokens := range tokensList { if finishReason == StopFinishReason && len(tokens) <= nTokens { diff --git a/pkg/kv-cache/block_cache.go b/pkg/kv-cache/block_cache.go index aa01ac42..75517ef5 100644 --- a/pkg/kv-cache/block_cache.go +++ b/pkg/kv-cache/block_cache.go @@ -73,7 +73,7 @@ func newBlockCache(config *common.Configuration, logger logr.Logger, usageChan c func (bc *blockCache) start(ctx context.Context) { err := bc.eventSender.Run(ctx) if err != nil { - bc.logger.Info("sender stopped with error", "error", err) + bc.logger.Error(err, "Sender stopped with error") } } diff --git a/pkg/kv-cache/kv_cache.go b/pkg/kv-cache/kv_cache.go index 11d8b547..cf69b0f3 100644 --- a/pkg/kv-cache/kv_cache.go +++ b/pkg/kv-cache/kv_cache.go @@ -22,6 +22,7 @@ import ( "github.com/go-logr/logr" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock" "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization" @@ -63,7 +64,7 @@ func (h *KVCacheHelper) Run(ctx context.Context) { } func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest) error { - h.logger.Info("KV cache - process request") + h.logger.V(logging.TRACE).Info("KV cache - process request") prompt := vllmReq.GetPrompt() modelName := vllmReq.GetModel() @@ -72,13 +73,13 @@ func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest // tokenize the input tokens, _, err := h.tokenizer.Encode(prompt, modelName) if err != nil { - h.logger.Info("Prompt tokenization failed", "error", err.Error()) + h.logger.Error(err, "prompt tokenization failed") return err } // get block keys blockKeys := h.tokensProcessor.TokensToKVBlockKeys(tokens, modelName) - h.logger.Info("found tokens", "tokens", tokens, "block-keys", blockKeys) + h.logger.V(logging.TRACE).Info("Found tokens", "tokens", tokens, "block-keys", blockKeys) blockHashes := make([]uint64, len(blockKeys)) for i, key := range blockKeys { diff --git a/pkg/kv-cache/kv_cache_sender.go b/pkg/kv-cache/kv_cache_sender.go index b0ddac3f..c09b30aa 100644 --- a/pkg/kv-cache/kv_cache_sender.go +++ b/pkg/kv-cache/kv_cache_sender.go @@ -22,6 +22,7 @@ import ( "github.com/go-logr/logr" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents" "github.com/vmihailenco/msgpack/v5" ) @@ -70,7 +71,7 @@ func (s *KVEventSender) Run(ctx context.Context) error { case <-ctx.Done(): // Exiting, discard remaining events if any if len(s.batch) > 0 { - s.logger.Info("Existing, discard remaining events", "num of events", len(s.batch)) + s.logger.V(logging.INFO).Info("Exiting, discard remaining events", "num of events", len(s.batch)) } return ctx.Err() @@ -78,7 +79,7 @@ func (s *KVEventSender) Run(ctx context.Context) error { if !ok { // Channel closed, discard remaining events and exit if len(s.batch) > 0 { - s.logger.Info("Channel closed, discard remaining events", "num of events", len(s.batch)) + s.logger.V(logging.INFO).Info("Channel closed, discard remaining events", "num of events", len(s.batch)) } return nil } diff --git a/pkg/llm-d-inference-sim/helpers.go b/pkg/llm-d-inference-sim/helpers.go index 451767cb..809894b6 100644 --- a/pkg/llm-d-inference-sim/helpers.go +++ b/pkg/llm-d-inference-sim/helpers.go @@ -22,6 +22,7 @@ import ( "fmt" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" ) @@ -92,7 +93,7 @@ func (s *VllmSimulator) showConfig(dp bool) error { if err != nil { return fmt.Errorf("failed to marshal configuration to JSON: %w", err) } - s.logger.Info("Configuration:", "", string(cfgJSON)) + s.logger.V(logging.INFO).Info("Configuration:", "", string(cfgJSON)) return nil } diff --git a/pkg/llm-d-inference-sim/lora.go b/pkg/llm-d-inference-sim/lora.go index 608bce93..f7531605 100644 --- a/pkg/llm-d-inference-sim/lora.go +++ b/pkg/llm-d-inference-sim/lora.go @@ -21,6 +21,7 @@ import ( "encoding/json" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" "github.com/valyala/fasthttp" ) @@ -40,7 +41,7 @@ func (s *VllmSimulator) getLoras() []string { if lora, ok := key.(string); ok { loras = append(loras, lora) } else { - s.logger.Info("Stored LoRA is not a string", "value", key) + s.logger.V(logging.WARN).Info("Stored LoRA is not a string", "value", key) } return true }) diff --git a/pkg/llm-d-inference-sim/metrics.go b/pkg/llm-d-inference-sim/metrics.go index 787d452c..8607289d 100644 --- a/pkg/llm-d-inference-sim/metrics.go +++ b/pkg/llm-d-inference-sim/metrics.go @@ -66,7 +66,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { ) if err := s.metrics.registry.Register(s.metrics.loraInfo); err != nil { - s.logger.Error(err, "Prometheus lora info gauge register failed") + s.logger.Error(err, "prometheus lora info gauge register failed") return err } @@ -80,7 +80,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { ) if err := s.metrics.registry.Register(s.metrics.runningRequests); err != nil { - s.logger.Error(err, "Prometheus number of running requests gauge register failed") + s.logger.Error(err, "prometheus number of running requests gauge register failed") return err } @@ -95,7 +95,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { ) if err := s.metrics.registry.Register(s.metrics.waitingRequests); err != nil { - s.logger.Error(err, "Prometheus number of requests in queue gauge register failed") + s.logger.Error(err, "prometheus number of requests in queue gauge register failed") return err } @@ -110,7 +110,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { ) if err := s.metrics.registry.Register(s.metrics.ttft); err != nil { - s.logger.Error(err, "Prometheus time to first token histogram register failed") + s.logger.Error(err, "prometheus time to first token histogram register failed") return err } @@ -125,7 +125,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { ) if err := s.metrics.registry.Register(s.metrics.tpot); err != nil { - s.logger.Error(err, "Prometheus time per output token histogram register failed") + s.logger.Error(err, "prometheus time per output token histogram register failed") return err } @@ -214,7 +214,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { ) if err := s.metrics.registry.Register(s.metrics.kvCacheUsagePercentage); err != nil { - s.logger.Error(err, "Prometheus kv cache usage percentage gauge register failed") + s.logger.Error(err, "prometheus kv cache usage percentage gauge register failed") return err } @@ -228,7 +228,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { []string{vllmapi.PromLabelModelName}, ) if err := s.metrics.registry.Register(s.metrics.requestPromptTokens); err != nil { - s.logger.Error(err, "Prometheus request_prompt_tokens histogram register failed") + s.logger.Error(err, "prometheus request_prompt_tokens histogram register failed") return err } @@ -242,7 +242,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { []string{vllmapi.PromLabelModelName}, ) if err := s.metrics.registry.Register(s.metrics.requestGenerationTokens); err != nil { - s.logger.Error(err, "Prometheus request_generation_tokens histogram register failed") + s.logger.Error(err, "prometheus request_generation_tokens histogram register failed") return err } @@ -256,7 +256,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { []string{vllmapi.PromLabelModelName}, ) if err := s.metrics.registry.Register(s.metrics.requestParamsMaxTokens); err != nil { - s.logger.Error(err, "Prometheus request_params_max_tokens histogram register failed") + s.logger.Error(err, "prometheus request_params_max_tokens histogram register failed") return err } @@ -269,7 +269,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error { []string{vllmapi.PromLabelModelName, vllmapi.PromLabelFinishReason}, ) if err := s.metrics.registry.Register(s.metrics.requestSuccessTotal); err != nil { - s.logger.Error(err, "Prometheus request_success_total counter register failed") + s.logger.Error(err, "prometheus request_success_total counter register failed") return err } diff --git a/pkg/llm-d-inference-sim/server.go b/pkg/llm-d-inference-sim/server.go index e21717a9..44ce37bf 100644 --- a/pkg/llm-d-inference-sim/server.go +++ b/pkg/llm-d-inference-sim/server.go @@ -30,6 +30,7 @@ import ( "github.com/valyala/fasthttp/fasthttpadaptor" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api" ) @@ -75,10 +76,10 @@ func (s *VllmSimulator) startServer(ctx context.Context, listener net.Listener) serverErr := make(chan error, 1) go func() { if s.config.SSLEnabled() { - s.logger.Info("Server starting", "protocol", "HTTPS", "port", s.config.Port) + s.logger.V(logging.INFO).Info("Server starting", "protocol", "HTTPS", "port", s.config.Port) serverErr <- server.ServeTLS(listener, "", "") } else { - s.logger.Info("Server starting", "protocol", "HTTP", "port", s.config.Port) + s.logger.V(logging.INFO).Info("Server starting", "protocol", "HTTP", "port", s.config.Port) serverErr <- server.Serve(listener) } }() @@ -86,20 +87,20 @@ func (s *VllmSimulator) startServer(ctx context.Context, listener net.Listener) // Wait for either context cancellation or server error select { case <-ctx.Done(): - s.logger.Info("Shutdown signal received, shutting down server gracefully") + s.logger.V(logging.INFO).Info("Shutdown signal received, shutting down server gracefully") // Gracefully shutdown the server if err := server.Shutdown(); err != nil { - s.logger.Error(err, "Error during server shutdown") + s.logger.Error(err, "error during server shutdown") return err } - s.logger.Info("Server stopped") + s.logger.V(logging.INFO).Info("Server stopped") return nil case err := <-serverErr: if err != nil { - s.logger.Error(err, "Server failed") + s.logger.Error(err, "server failed") } return err } @@ -165,7 +166,7 @@ func (s *VllmSimulator) readTokenizeRequest(ctx *fasthttp.RequestCtx) (*vllmapi. // HandleTokenize http handler for /tokenize func (s *VllmSimulator) HandleTokenize(ctx *fasthttp.RequestCtx) { - s.logger.Info("tokenize request received") + s.logger.V(logging.TRACE).Info("Tokenize request received") req, err := s.readTokenizeRequest(ctx) if err != nil { s.logger.Error(err, "failed to read and parse tokenize request body") @@ -207,12 +208,12 @@ func (s *VllmSimulator) HandleTokenize(ctx *fasthttp.RequestCtx) { } func (s *VllmSimulator) HandleLoadLora(ctx *fasthttp.RequestCtx) { - s.logger.Info("load lora request received") + s.logger.V(logging.DEBUG).Info("Load lora request received") s.loadLoraAdaptor(ctx) } func (s *VllmSimulator) HandleUnloadLora(ctx *fasthttp.RequestCtx) { - s.logger.Info("unload lora request received") + s.logger.V(logging.DEBUG).Info("Unload lora request received") s.unloadLoraAdaptor(ctx) } @@ -270,7 +271,7 @@ func (s *VllmSimulator) sendCompletionResponse(ctx *fasthttp.RequestCtx, resp op func (s *VllmSimulator) sendCompletionError(ctx *fasthttp.RequestCtx, compErr openaiserverapi.CompletionError, isInjected bool) { if isInjected { - s.logger.Info("Injecting failure", "type", compErr.Type, "message", compErr.Message) + s.logger.V(logging.TRACE).Info("Injecting failure", "type", compErr.Type, "message", compErr.Message) } else { s.logger.Error(nil, compErr.Message) } @@ -291,11 +292,12 @@ func (s *VllmSimulator) sendCompletionError(ctx *fasthttp.RequestCtx, // HandleModels handles /v1/models request according the data stored in the simulator func (s *VllmSimulator) HandleModels(ctx *fasthttp.RequestCtx) { + s.logger.V(logging.TRACE).Info("/models request received") modelsResp := s.createModelsResponse() data, err := json.Marshal(modelsResp) if err != nil { - s.logger.Error(err, "Failed to marshal models response") + s.logger.Error(err, "failed to marshal models response") ctx.Error("Failed to marshal models response, "+err.Error(), fasthttp.StatusInternalServerError) return } @@ -306,12 +308,12 @@ func (s *VllmSimulator) HandleModels(ctx *fasthttp.RequestCtx) { } func (s *VllmSimulator) HandleError(_ *fasthttp.RequestCtx, err error) { - s.logger.Error(err, "VLLM server error") + s.logger.Error(err, "vLLM server error") } // HandleHealth http handler for /health func (s *VllmSimulator) HandleHealth(ctx *fasthttp.RequestCtx) { - s.logger.V(4).Info("health request received") + s.logger.V(logging.TRACE).Info("Health request received") ctx.Response.Header.SetContentType("application/json") ctx.Response.Header.SetStatusCode(fasthttp.StatusOK) ctx.Response.SetBody([]byte("{}")) @@ -319,7 +321,7 @@ func (s *VllmSimulator) HandleHealth(ctx *fasthttp.RequestCtx) { // HandleReady http handler for /ready func (s *VllmSimulator) HandleReady(ctx *fasthttp.RequestCtx) { - s.logger.V(4).Info("readiness request received") + s.logger.V(logging.TRACE).Info("Readiness request received") ctx.Response.Header.SetContentType("application/json") ctx.Response.Header.SetStatusCode(fasthttp.StatusOK) ctx.Response.SetBody([]byte("{}")) diff --git a/pkg/llm-d-inference-sim/server_tls.go b/pkg/llm-d-inference-sim/server_tls.go index 601418d7..0f23772c 100644 --- a/pkg/llm-d-inference-sim/server_tls.go +++ b/pkg/llm-d-inference-sim/server_tls.go @@ -27,6 +27,7 @@ import ( "math/big" "time" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" "github.com/valyala/fasthttp" ) @@ -40,10 +41,10 @@ func (s *VllmSimulator) configureSSL(server *fasthttp.Server) error { var err error if s.config.SSLCertFile != "" && s.config.SSLKeyFile != "" { - s.logger.Info("HTTPS server starting with certificate files", "cert", s.config.SSLCertFile, "key", s.config.SSLKeyFile) + s.logger.V(logging.INFO).Info("HTTPS server starting with certificate files", "cert", s.config.SSLCertFile, "key", s.config.SSLKeyFile) cert, err = tls.LoadX509KeyPair(s.config.SSLCertFile, s.config.SSLKeyFile) } else if s.config.SelfSignedCerts { - s.logger.Info("HTTPS server starting with self-signed certificate") + s.logger.V(logging.INFO).Info("HTTPS server starting with self-signed certificate") cert, err = CreateSelfSignedTLSCertificate() } diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index 158ee3a1..0dbd8279 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -34,6 +34,7 @@ import ( "k8s.io/klog/v2" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" "github.com/llm-d/llm-d-inference-sim/pkg/dataset" kvcache "github.com/llm-d/llm-d-inference-sim/pkg/kv-cache" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" @@ -272,7 +273,7 @@ func (s *VllmSimulator) startSim(ctx context.Context) error { listener, err := s.newListener() if err != nil { - s.logger.Error(err, "Failed to create listener") + s.logger.Error(err, "failed to create listener") return fmt.Errorf("listener creation error: %w", err) } @@ -366,7 +367,7 @@ func (s *VllmSimulator) initDataset(ctx context.Context) error { } if s.config.DatasetPath == "" && s.config.DatasetURL == "" { - s.logger.Info("No dataset path or URL provided, using random text for responses") + s.logger.V(logging.INFO).Info("No dataset path or URL provided, using random text for responses") s.dataset = randDataset return nil } @@ -380,7 +381,7 @@ func (s *VllmSimulator) initDataset(ctx context.Context) error { } if strings.HasPrefix(err.Error(), "database is locked") { - s.logger.Info("Database is locked by another process, will use preset text for responses instead") + s.logger.V(logging.WARN).Info("Database is locked by another process, will use preset text for responses instead") s.dataset = randDataset return nil } @@ -390,20 +391,20 @@ func (s *VllmSimulator) initDataset(ctx context.Context) error { // Print prints to a log, implementation of fasthttp.Logger func (s *VllmSimulator) Printf(format string, args ...interface{}) { - s.logger.Info("Server error", "msg", fmt.Sprintf(format, args...)) + s.logger.V(logging.WARN).Info("Server error", "msg", fmt.Sprintf(format, args...)) } func (s *VllmSimulator) processing(ctx context.Context) { - s.logger.Info("Start processing routine") + s.logger.V(logging.INFO).Info("Start processing routine") for { select { case <-ctx.Done(): - s.logger.Info("Request processing done") + s.logger.V(logging.INFO).Info("Request processing done") return case completedReq := <-s.workerFinished: worker := completedReq.worker - s.logger.V(4).Info("Worker finished", "worker", worker.id) + s.logger.V(logging.TRACE).Info("Worker finished", "worker", worker.id) s.decrementLora(completedReq.model) // there is a free worker - find a request for it and send this request for // processing with this worker @@ -412,7 +413,7 @@ func (s *VllmSimulator) processing(ctx context.Context) { // there is a LoRA that can be removed, go through availbale workers // and queued requests and find requests that can run now, // stop if there are no free workers, or no requests - s.logger.V(4).Info("LoRA can be removed") + s.logger.V(logging.TRACE).Info("LoRA can be removed") for { // check if there is a free worker worker := s.getFreeWorker() @@ -433,7 +434,7 @@ func (s *VllmSimulator) processing(ctx context.Context) { worker := s.getFreeWorker() if worker == nil { - s.logger.V(4).Info("No free worker - sending the request to the waiting queue", + s.logger.V(logging.TRACE).Info("No free worker - sending the request to the waiting queue", "model", reqCtx.CompletionReq.GetModel(), "req id", reqCtx.CompletionReq.GetRequestID()) // no free worker, add this request to the waiting queue s.addRequestToQueue(reqCtx) @@ -444,14 +445,14 @@ func (s *VllmSimulator) processing(ctx context.Context) { if s.isLora(model) && !s.loadLora(model) { // free the worker s.freeWorkers <- worker - s.logger.V(4).Info("LoRA cannot be loaded - sending the request to the waiting queue", + s.logger.V(logging.TRACE).Info("LoRA cannot be loaded - sending the request to the waiting queue", "LoRA", model, "req id", reqCtx.CompletionReq.GetRequestID()) // LoRA max reached, try to enqueue s.addRequestToQueue(reqCtx) break } - s.logger.V(4).Info("Sending the request to the processing channel", "model", model, + s.logger.V(logging.TRACE).Info("Sending the request to the processing channel", "model", model, "req id", reqCtx.CompletionReq.GetRequestID(), "worker", worker.id) common.WriteToChannel(worker.reqChan, reqCtx, s.logger, "worker's reqChan") } @@ -462,7 +463,7 @@ func (s *VllmSimulator) findRequestAndSendToProcess(worker *worker) bool { nextReq := s.dequeue() if nextReq != nil { // send this request for processing in this worker - s.logger.V(4).Info("Sending request to processing", "model", nextReq.CompletionReq.GetModel(), + s.logger.V(logging.TRACE).Info("Sending request to processing", "model", nextReq.CompletionReq.GetModel(), "req", nextReq.CompletionReq.GetRequestID(), "worker", worker.id) common.WriteToChannel(worker.reqChan, nextReq, s.logger, "worker's reqChan") // decrement waiting requests metric @@ -517,7 +518,7 @@ func (s *VllmSimulator) handleCompletions(ctx *fasthttp.RequestCtx, isChatComple return } - s.logger.V(4).Info("Completion request received", "req id", vllmReq.GetRequestID(), "isChat", isChatCompletion) + s.logger.V(logging.DEBUG).Info("Completion request received", "req id", vllmReq.GetRequestID(), "isChat", isChatCompletion) var wg sync.WaitGroup wg.Add(1) diff --git a/pkg/llm-d-inference-sim/streaming.go b/pkg/llm-d-inference-sim/streaming.go index 2a136beb..6e708b6d 100644 --- a/pkg/llm-d-inference-sim/streaming.go +++ b/pkg/llm-d-inference-sim/streaming.go @@ -24,6 +24,7 @@ import ( "time" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" "github.com/llm-d/llm-d-inference-sim/pkg/dataset" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" "github.com/valyala/fasthttp" @@ -73,12 +74,12 @@ func (s *VllmSimulator) sendStreamingResponse(context *streamingContext, respons } } if len(toolCalls) > 0 { - s.logger.Info("Going to send tools calls") + s.logger.V(logging.TRACE).Info("Going to send tools calls") for _, tc := range toolCalls { s.sendTokenChunks(context, w, tc.Function.TokenizedArguments, &tc, finishReason) } } else { - s.logger.Info("Going to send text", "number of tokens", len(responseTokens)) + s.logger.V(logging.TRACE).Info("Going to send text", "number of tokens", len(responseTokens)) s.sendTokenChunks(context, w, responseTokens, nil, finishReason) } } diff --git a/pkg/llm-d-inference-sim/worker.go b/pkg/llm-d-inference-sim/worker.go index 1dc796a6..692a657f 100644 --- a/pkg/llm-d-inference-sim/worker.go +++ b/pkg/llm-d-inference-sim/worker.go @@ -23,6 +23,7 @@ import ( "github.com/go-logr/logr" "github.com/llm-d/llm-d-inference-sim/pkg/common" + "github.com/llm-d/llm-d-inference-sim/pkg/common/logging" "github.com/llm-d/llm-d-inference-sim/pkg/dataset" openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api" "github.com/valyala/fasthttp" @@ -46,7 +47,7 @@ func (w *worker) waitForRequests() { for { select { case <-w.ctx.Done(): - w.logger.V(4).Info("worker done", "id", w.id) + w.logger.V(logging.TRACE).Info("worker done", "id", w.id) return case req := <-w.reqChan: w.processor.processRequest(req) @@ -157,7 +158,7 @@ func (s *VllmSimulator) processRequest(reqCtx *openaiserverapi.CompletionReqCtx) s.logger, "metrics.requestSuccessChan") } - s.logger.V(4).Info("Finished processing request", "id", req.GetRequestID()) + s.logger.V(logging.DEBUG).Info("Finished processing request", "id", req.GetRequestID()) reqCtx.Wg.Done() }