Added additional logging for ollama/llamacpp calls

david-yz-liu · david-yz-liu · commit 18485b6e25a1 · 2025-12-22T15:53:28.000-05:00
diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py
@@ -78,13 +78,22 @@ def chat_with_llama_server_http(
         if 'stream' not in payload:
             payload['stream'] = False
 
+        start_log_data = {
+            'model': model
+        }
+        logger.info(f'chat_with_llama_server_http starting: {start_log_data}')
         response = requests.post(
             f'{LLAMA_SERVER_URL}/v1/chat/completions',
             json=payload,
             headers={'Content-Type': 'application/json'},
             timeout=timeout,
         )
 
+        done_log_data = {
+            'model': model,
+            'response_status_code': response.status_code
+        }
+        logger.info(f'chat_with_llama_server_http done: {start_log_data}')
         if response.status_code == 200:
             data = response.json()
             if 'choices' in data and len(data['choices']) > 0:
@@ -123,13 +132,26 @@ def chat_with_ollama(
     """Handle chat using ollama."""
     messages = _build_messages(content, system_prompt, image_files)
 
+    start_log_data = {
+        'model': model
+    }
+    logger.info(f'chat_with_ollama starting: {start_log_data}')
     response = ollama.chat(
         model=model,
         messages=messages,
         stream=False,
         format=json_schema[SCHEMA_KEY] if json_schema else None,
         options=model_options,
     )
+    done_log_data = {
+        'model': model,
+        'eval_duration': response.eval_duration,
+        'prompt_eval_duration': response.prompt_eval_duration,
+        'eval_count': response.eval_count,
+        'prompt_eval_count': response.eval_count
+    }
+
+    logger.info(f'chat_with_ollama done: {done_log_data}')
     return response.message.content
 
 
@@ -165,7 +187,12 @@ def chat_with_llamacpp(
         pass  # TODO: pass image files
 
     try:
+        start_log_data = {
+            'model': model
+        }
+        logger.info(f'chat_with_llamacpp starting: {start_log_data}')
         result = subprocess.run(cmd, capture_output=True, text=False, timeout=timeout, check=True)
+        logger.info(f'chat_with_llamacpp done: {start_log_data}')
 
         stdout_text = result.stdout.decode('utf-8', errors='replace')