load_model_tokenizer_for_generate should raise ValueError (#99)

matthayes · web-flow · commit 0eadcb7b0648 · 2023-04-17T12:04:23.000-07:00
This is what InstructionTextGenerationPipeline expects. Also same code used in: https://huggingface.co/databricks/dolly-v2-12b/blob/main/instruct_pipeline.py
diff --git a/training/generate.py b/training/generate.py
@@ -51,14 +51,14 @@ def get_special_token_id(tokenizer: PreTrainedTokenizer, key: str) -> int:
         key (str): the key to convert to a single token
 
     Raises:
-        RuntimeError: if more than one ID was generated
+        ValueError: if more than one ID was generated
 
     Returns:
         int: the token ID for the given key
     """
     token_ids = tokenizer.encode(key)
     if len(token_ids) > 1:
-        raise RuntimeError(f"Expected only a single token for '{key}' but found {token_ids}")
+        raise ValueError(f"Expected only a single token for '{key}' but found {token_ids}")
     return token_ids[0]