@@ -18,6 +18,7 @@ package llmdinferencesim
1818
1919import (
2020 "context"
21+ "encoding/json"
2122 "errors"
2223 "fmt"
2324 "io"
@@ -29,6 +30,8 @@ import (
2930
3031 "github.com/llm-d/llm-d-inference-sim/pkg/common"
3132 kvcache "github.com/llm-d/llm-d-inference-sim/pkg/kv-cache"
33+ vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api"
34+ "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
3235 . "github.com/onsi/ginkgo/v2"
3336 . "github.com/onsi/gomega"
3437 "github.com/openai/openai-go"
@@ -97,8 +100,17 @@ func startServerWithArgs(ctx context.Context, mode string, args []string, envs m
97100 return nil , err
98101 }
99102
103+ tokenizationConfig := tokenization .DefaultConfig ()
104+ if s .config .TokenizersCacheDir != "" {
105+ tokenizationConfig .TokenizersCacheDir = s .config .TokenizersCacheDir
106+ }
107+ s .tokenizer , err = tokenization .NewCachedHFTokenizer (tokenizationConfig .HFTokenizerConfig )
108+ if err != nil {
109+ return nil , fmt .Errorf ("failed to create tokenizer: %w" , err )
110+ }
111+
100112 if s .config .EnableKVCache {
101- s .kvcacheHelper , err = kvcache .NewKVCacheHelper (s .config , s .logger , s .kvCacheUsageChan )
113+ s .kvcacheHelper , err = kvcache .NewKVCacheHelper (s .config , s .logger , s .kvCacheUsageChan , s . tokenizer )
102114 if err != nil {
103115 return nil , err
104116 }
@@ -1065,7 +1077,69 @@ var _ = Describe("Simulator", func() {
10651077 Expect (factor ).To (BeNumerically (">" , 1.0 ))
10661078 Expect (factor ).To (BeNumerically ("<" , simulator .config .TimeFactorUnderLoad ))
10671079 })
1080+ })
1081+
1082+ It ("Should return correct response to /tokenize chat" , func () {
1083+ ctx := context .TODO ()
1084+ modelName := "Qwen/Qwen2-0.5B"
1085+ tmpDir := "./tests-tmp/"
1086+ defer os .RemoveAll (tmpDir )
1087+ args := []string {"cmd" , "--model" , modelName , "--mode" , common .ModeRandom ,
1088+ "--tokenizers-cache-dir" , tmpDir }
1089+ client , err := startServerWithArgs (ctx , common .ModeRandom , args , nil )
1090+ Expect (err ).NotTo (HaveOccurred ())
1091+
1092+ reqBody := `{
1093+ "messages": [{"role": "user", "content": "This is a test"}],
1094+ "model": "Qwen/Qwen2-0.5B"
1095+ }`
1096+ resp , err := client .Post ("http://localhost/tokenize" , "application/json" , strings .NewReader (reqBody ))
1097+ Expect (err ).NotTo (HaveOccurred ())
1098+ defer func () {
1099+ err := resp .Body .Close ()
1100+ Expect (err ).NotTo (HaveOccurred ())
1101+ }()
1102+
1103+ body , err := io .ReadAll (resp .Body )
1104+ Expect (err ).NotTo (HaveOccurred ())
10681105
1106+ var tokenizeResp vllmapi.TokenizeResponse
1107+ err = json .Unmarshal (body , & tokenizeResp )
1108+ Expect (err ).NotTo (HaveOccurred ())
1109+ Expect (tokenizeResp .Count ).To (Equal (4 ))
1110+ Expect (tokenizeResp .Tokens ).To (HaveLen (4 ))
1111+ Expect (tokenizeResp .MaxModelLen ).To (Equal (1024 ))
10691112 })
10701113
1114+ It ("Should return correct response to /tokenize text" , func () {
1115+ ctx := context .TODO ()
1116+ modelName := "Qwen/Qwen2-0.5B"
1117+ tmpDir := "./tests-tmp/"
1118+ defer os .RemoveAll (tmpDir )
1119+ args := []string {"cmd" , "--model" , modelName , "--mode" , common .ModeRandom ,
1120+ "--tokenizers-cache-dir" , tmpDir }
1121+ client , err := startServerWithArgs (ctx , common .ModeRandom , args , nil )
1122+ Expect (err ).NotTo (HaveOccurred ())
1123+
1124+ reqBody := `{
1125+ "prompt": "This is a test",
1126+ "model": "Qwen/Qwen2-0.5B"
1127+ }`
1128+ resp , err := client .Post ("http://localhost/tokenize" , "application/json" , strings .NewReader (reqBody ))
1129+ Expect (err ).NotTo (HaveOccurred ())
1130+ defer func () {
1131+ err := resp .Body .Close ()
1132+ Expect (err ).NotTo (HaveOccurred ())
1133+ }()
1134+
1135+ body , err := io .ReadAll (resp .Body )
1136+ Expect (err ).NotTo (HaveOccurred ())
1137+
1138+ var tokenizeResp vllmapi.TokenizeResponse
1139+ err = json .Unmarshal (body , & tokenizeResp )
1140+ Expect (err ).NotTo (HaveOccurred ())
1141+ Expect (tokenizeResp .Count ).To (Equal (4 ))
1142+ Expect (tokenizeResp .Tokens ).To (HaveLen (4 ))
1143+ Expect (tokenizeResp .MaxModelLen ).To (Equal (1024 ))
1144+ })
10711145})
0 commit comments