server : use text_to_send instead of detokenized token

danbev · danbev · commit 0f66d9936243 · 2025-02-07T19:22:46.000+01:00
This commit adds a check to the server to avoid using the detokenized predicted token when the predicted token id is the same as the token id that the server is responding with. The motivation for this is is to avoid a mismatch between the text tokens where the text_to_send token may include a leading whitespace character but the detokenized token would not. Resolves: #11728
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -541,12 +541,16 @@ struct completion_token_output {
     json to_json(bool post_sampling_probs) const {
         json probs_for_token = json::array();
         for (const auto & p : probs) {
-            std::string txt(p.txt);
+            // If the predicted token id is the same as this.tok, then we use the text_to_send instead
+            // of the detokenized token. This is to avoid a mismatch between the text tokens where
+            // the text_to_send token may include a leading whitespace character but the detokenized
+            // token would not.
+            std::string txt = tok == p.tok ? text_to_send : p.txt;
             txt.resize(validate_utf8(txt));
             probs_for_token.push_back(json {
                 {"id",      p.tok},
                 {"token",   txt},
-                {"bytes",   str_to_bytes(p.txt)},
+                {"bytes",   str_to_bytes(txt)},
                 {
                     post_sampling_probs ? "prob" : "logprob",
                     post_sampling_probs ? p.prob : logarithm(p.prob)