Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 4dedb4e

Browse files
authored
Enable the integration tests for llamacpp (#868)
* Enable the integration tests for llamacpp Signed-off-by: Radoslav Dimitrov <[email protected]>
1 parent a428992 commit 4dedb4e

File tree

4 files changed

+42
-19
lines changed

4 files changed

+42
-19
lines changed

.github/workflows/integration-tests.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ jobs:
5151
echo "Loaded image:"
5252
docker images
5353
54+
- name: Download the Qwen2.5-Coder-0.5B-Instruct-GGUF model
55+
run: |
56+
# This is needed for the llamacpp integration tests
57+
wget -P ./codegate_volume/models https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q5_k_m.gguf
58+
5459
- name: Run container from the loaded image
5560
run: |
5661
# Get the image name
@@ -235,6 +240,12 @@ jobs:
235240
run: |
236241
poetry run python tests/integration/integration_tests.py
237242
243+
- name: Run integration tests - llamacpp
244+
env:
245+
CODEGATE_PROVIDERS: "llamacpp"
246+
run: |
247+
poetry run python tests/integration/integration_tests.py
248+
238249
- name: Print the CodeGate container logs (useful for debugging)
239250
if: always()
240251
run: |

src/codegate/providers/llamacpp/completion_handler.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,25 @@ async def execute_completion(
5959
"""
6060
model_path = f"{Config.get_config().model_base_path}/{request['model']}.gguf"
6161

62+
# Create a copy of the request dict and remove stream_options
63+
# Reason - Request error as JSON:
64+
# {'error': "Llama.create_completion() got an unexpected keyword argument 'stream_options'"}
65+
request_dict = dict(request)
66+
request_dict.pop("stream_options", None)
67+
6268
if is_fim_request:
6369
response = await self.inference_engine.complete(
6470
model_path,
6571
Config.get_config().chat_model_n_ctx,
6672
Config.get_config().chat_model_n_gpu_layers,
67-
**request,
73+
**request_dict,
6874
)
6975
else:
7076
response = await self.inference_engine.chat(
7177
model_path,
7278
Config.get_config().chat_model_n_ctx,
7379
Config.get_config().chat_model_n_gpu_layers,
74-
**request,
80+
**request_dict,
7581
)
7682

7783
return convert_to_async_iterator(response) if stream else response

tests/integration/integration_tests.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,25 @@ def parse_response_message(response, streaming=True):
6767
if "DONE" in decoded_line or "message_stop" in decoded_line:
6868
break
6969

70-
decoded_line = decoded_line.replace("data:", "")
70+
decoded_line = decoded_line.replace("data:", "").strip()
7171
json_line = json.loads(decoded_line)
72-
7372
message_content = None
73+
# Handle both chat and FIM responses
7474
if "choices" in json_line:
75-
if "finish_reason" in json_line["choices"][0]:
75+
choice = json_line["choices"][0]
76+
# Break if the conversation is over
77+
if choice.get("finish_reason") == "stop":
7678
break
77-
if "delta" in json_line["choices"][0]:
78-
message_content = json_line["choices"][0]["delta"].get("content", "")
79-
elif "text" in json_line["choices"][0]:
80-
message_content = json_line["choices"][0].get("text", "")
79+
# Handle chat responses
80+
if "delta" in choice:
81+
delta = choice["delta"]
82+
if "content" in delta and delta["content"] is not None:
83+
message_content = delta["content"]
84+
# Handle FIM responses
85+
elif "text" in choice:
86+
text = choice["text"]
87+
if text is not None:
88+
message_content = text
8189
elif "delta" in json_line:
8290
message_content = json_line["delta"].get("text", "")
8391
elif "message" in json_line:
@@ -87,7 +95,6 @@ def parse_response_message(response, streaming=True):
8795

8896
if message_content is not None:
8997
response_message += message_content
90-
9198
else:
9299
if "choices" in response.json():
93100
response_message = response.json()["choices"][0]["message"].get("content", "")
@@ -97,7 +104,8 @@ def parse_response_message(response, streaming=True):
97104
except Exception as e:
98105
logger.exception("An error occurred: %s", e)
99106

100-
return response_message
107+
# Remove any trailing newlines and return
108+
return response_message.strip()
101109

102110
@staticmethod
103111
def replace_env_variables(input_string, env):

tests/integration/testcases.yaml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ headers:
66
ollama:
77
Content-Type: application/json
88
llamacpp:
9+
Content-Type: application/json
910
anthropic:
1011
x-api-key: ENV_ANTHROPIC_KEY
1112
copilot:
@@ -68,7 +69,7 @@ testcases:
6869
"role":"user"
6970
}
7071
],
71-
"model":"qwen2.5-coder-1.5b-instruct-q5_k_m",
72+
"model":"qwen2.5-coder-0.5b-instruct-q5_k_m",
7273
"stream":true,
7374
"temperature":0
7475
}
@@ -81,18 +82,15 @@ testcases:
8182
url: http://127.0.0.1:8989/llamacpp/completions
8283
data: |
8384
{
84-
"model": "qwen2.5-coder-1.5b-instruct-q5_k_m",
85+
"model": "qwen2.5-coder-0.5b-instruct-q5_k_m",
8586
"max_tokens": 4096,
8687
"temperature": 0,
8788
"stream": true,
88-
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```"],
89-
"prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
89+
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```", "def test"],
90+
"prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>"
9091
}
9192
likes: |
92-
url = 'http://localhost:8080'
93-
headers = {'Authorization': f'Bearer {key}'}
94-
response = requests.get(url, headers=headers)
95-
return response.json()
93+
print("Hello, World!")
9694
9795
openai_chat:
9896
name: OpenAI Chat

0 commit comments

Comments
 (0)