Never output EOS token's text

njhill · njhill · commit 4115c9c525bc · 2023-10-13T15:15:25.000-07:00
There is a global output-special-tokens option which is disabled by default, but when enabled causes all special tokens to be output including the terminating EOS token.

For the use cases we've encountered this is undesirable. I can't think of a case where this would be needed/wanted since it will only ever be at the end of the output, and in these cases the returned stop_reason will be EOS_TOKEN.
diff --git a/integration_tests/pyproject.toml b/integration_tests/pyproject.toml
@@ -20,6 +20,7 @@ markers = [
     "model",
     "extensions",
     "shards",
+    "output_special_tokens",
     "test_case_file",
 ]
 
diff --git a/integration_tests/test_cases_mt0_ost.yaml b/integration_tests/test_cases_mt0_ost.yaml
@@ -0,0 +1,15 @@
+
+# EOS token
+- name: EOS
+  request:
+    params:
+      stopping:
+        maxNewTokens: 10
+    requests:
+      - {"text": "In one word, the capital of France is"}
+  response:
+    responses:
+      - generatedTokenCount: 2
+        inputTokenCount: 10
+        stopReason: EOS_TOKEN
+        text: France
diff --git a/integration_tests/text_generation_tests/test_server.py b/integration_tests/text_generation_tests/test_server.py
@@ -31,6 +31,7 @@ def start_server(
     timeout=20,
     model_path=None,
     include_cache_env_vars=True,
+    output_special_tokens=False,
 ):
     # Download weights to the cache first
     print(f"Downloading files for model {model_name}...")
@@ -64,6 +65,9 @@ def start_server(
         "--max-batch-weight", "80000",
     ]
 
+    if output_special_tokens:
+        args.append("--output-special-tokens")
+
     env = os.environ.copy()
     env["RUST_BACKTRACE"] = "full"
     env["PREFIX_STORE_PATH"] = os.path.join(TESTS_DIR, "prompt_prefixes")
@@ -115,7 +119,11 @@ def server_fixture(request):
     model_name = request.node.get_closest_marker("model").args[0]
     shards = int(request.node.get_closest_marker("shards").args[0])
     extensions = request.node.get_closest_marker("extensions").args[0]
-    p = start_server(model_name, extensions, shards, 3000, 29502)
+    ost = request.node.get_closest_marker("output_special_tokens")
+    ost = ost is not None and ost.args[0]
+    p = start_server(
+        model_name, extensions, shards, 3000, 29502, output_special_tokens=ost
+    )
     yield p
     p.terminate()
     assert p.wait(8.0) == 0
@@ -356,6 +364,16 @@ async def test_bloom(server_fixture, test_cases):
     await run_test_cases_async(test_cases, sharded=True)
 
 
+@pytest.mark.model("bigscience/mt0-small")
+@pytest.mark.extensions(".bin,.json")
+@pytest.mark.shards(1)
+@pytest.mark.output_special_tokens(True)
+@pytest.mark.test_case_file("test_cases_mt0_ost.yaml")
+@pytest.mark.asyncio
+async def test_mt0_output_special_tokens(server_fixture, test_cases):
+    await run_test_cases_async(test_cases)
+
+
 # Test loading when an explicit local path is provided
 def test_explicit_path():
     # Test with and without providing TRANSFORMERS_CACHE env var
diff --git a/router/src/decoder.rs b/router/src/decoder.rs
@@ -35,7 +35,10 @@ impl Decoder {
         }
     }
 
-    fn decode_full(&self, ids: &[u32]) -> Result<String, InferError> {
+    fn decode_full(&self, mut ids: &[u32]) -> Result<String, InferError> {
+        if !self.skip_special_toks && ids.last() == Some(&self.eos_token_id) {
+            ids = &ids[..(ids.len()-1)];
+        }
         self.tokenizer.decode(ids, self.skip_special_toks).map_err(Error::into)
     }
 

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ markers = [`
`20`	`20`	`"model",`
`21`	`21`	`"extensions",`
`22`	`22`	`"shards",`
	`23`	`+ "output_special_tokens",`
`23`	`24`	`"test_case_file",`
`24`	`25`	`]`
`25`	`26`
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,10 @@ impl Decoder {`
`35`	`35`	`}`
`36`	`36`	`}`
`37`	`37`
`38`		`- fn decode_full(&self, ids: &[u32]) -> Result<String, InferError> {`
	`38`	`+ fn decode_full(&self, mut ids: &[u32]) -> Result<String, InferError> {`
	`39`	`+ if !self.skip_special_toks && ids.last() == Some(&self.eos_token_id) {`
	`40`	`+ ids = &ids[..(ids.len()-1)];`
	`41`	`+ }`
`39`	`42`	`self.tokenizer.decode(ids, self.skip_special_toks).map_err(Error::into)`
`40`	`43`	`}`
`41`	`44`