From e8f6e335c00c4b6e56f544b02c3c0674dd99c736 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 25 May 2025 22:57:54 +0100 Subject: [PATCH 1/2] more forgiving message diffs: partial stop words aren't erased, full stops are --- common/chat.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/chat.cpp b/common/chat.cpp index 78af5eafa40c3..90f3f5b35ae59 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -31,6 +31,11 @@ static std::string string_diff(const std::string & last, const std::string & cur return current; } if (!string_starts_with(current, last)) { + if (string_starts_with(last, current)) { + // This happens if the last generation ended on a partial stop word (not erased), + // and the current ended on a stop word (erased). + return ""; + } throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'"); } return current.substr(last.size()); From 98982bdf96f0631275508b534348ac22e6b3878f Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 00:24:24 +0100 Subject: [PATCH 2/2] Add (slow) server test for completion + stream + stop --- tools/server/tests/unit/test_completion.py | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/server/tests/unit/test_completion.py b/tools/server/tests/unit/test_completion.py index 4099c4e25cd6e..f6909e9ae7884 100644 --- a/tools/server/tests/unit/test_completion.py +++ b/tools/server/tests/unit/test_completion.py @@ -121,6 +121,30 @@ def test_completion_stream_with_openai_library(): assert match_regex("(going|bed)+", output_text) +# Test case from https://github.com/ggml-org/llama.cpp/issues/13780 +@pytest.mark.slow +def test_completion_stream_with_openai_library_stops(): + global server + server.model_hf_repo = "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" + server.model_hf_file = None + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + res = client.completions.create( + model="davinci-002", + prompt="System: You are helpfull assistant.\nAssistant:\nHey! How could I help?\nUser:\nTell me a joke.\nAssistant:\n", + stop=["User:\n", "Assistant:\n"], + max_tokens=200, + stream=True, + ) + output_text = '' + for data in res: + choice = data.choices[0] + if choice.finish_reason is None: + assert choice.text is not None + output_text += choice.text + assert match_regex("Sure, here's one for[\\s\\S]*", output_text), f'Unexpected output: {output_text}' + + @pytest.mark.parametrize("n_slots", [1, 2]) def test_consistent_result_same_seed(n_slots: int): global server