feat(ollama): use chat api to leverage prompt templates

rti · rti · commit 51eff78db6f2 · 2024-02-27T16:12:19.000+01:00
closes #34
diff --git a/gswikichat/api.py b/gswikichat/api.py
@@ -54,6 +54,6 @@ async def api(query, top_k=3, lang='en'):
     logger.debug(f'{answer=}')
 
     return {
-        "answer": answer.data,
+        "answer": answer.data.content,
         "sources": sources
     }
diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py
@@ -1,5 +1,5 @@
 import os
-from haystack_integrations.components.generators.ollama import OllamaGenerator
+from haystack_integrations.components.generators.ollama import OllamaChatGenerator
 
 from .logger import get_logger
 
@@ -8,17 +8,16 @@
 
 OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME")
 OLLAMA_URL = os.environ.get("OLLAMA_URL")
-OLLAMA_GENERATE_URL = f"{OLLAMA_URL}/api/generate"
+OLLAMA_CHAT_URL = f"{OLLAMA_URL}/api/chat"
 
 logger.info(f'Using {OLLAMA_MODEL_NAME=}')
 logger.info(f'Endpoint: {OLLAMA_URL=}')
-logger.info(f'Generate: {OLLAMA_GENERATE_URL=}')
-
-logger.debug(f'I AM HERE')
+logger.info(f'Generate: {OLLAMA_CHAT_URL=}')
 
 logger.info(f"Setting up ollama with {OLLAMA_MODEL_NAME}")
 
-llm = OllamaGenerator(
+llm = OllamaChatGenerator(
     model=OLLAMA_MODEL_NAME,
-    url=OLLAMA_GENERATE_URL
+    url=OLLAMA_CHAT_URL,
+    timeout=120
 )
diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py
@@ -1,38 +1,28 @@
 from haystack.components.builders.prompt_builder import PromptBuilder
 
 prompt_template_en = """
-<|system|>
-You are a helpful assistant. You answer questions based on the given documents.
-Answer based on the documents only. If the information is not in the documents,
-say that you cannot find the information.
-<|endoftext|>
-<|user|>
 Documents:
 {% for doc_ in documents %}
     {{ doc_.content }}
 {% endfor %}
 With this documents, answer the following question: {{question}}
-<|endoftext|>
-<|assistant|>
 """
 
 prompt_template_de = """
-<|system|>
-Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten.
-Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist,
-sage, dass du sie nicht finden kannst.
-<|endoftext|>
-<|user|>
 Dokumente:
 {% for doc_ in documents %}
     {{ doc_.content }}
 {% endfor %}
 Mit diesen Dokumenten, beantworte die folgende Frage: {{question}}
-<|endoftext|>
-<|assistant|>
 """
 
-prompt_builders = {
+system_prompts = {
+    'en': 'You are a helpful assistant. You answer questions based on the given documents. Answer based on the documents only. If the information is not in the documents, say that you cannot find the information.',
+    'de': 'Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten. Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist, sage, dass du sie nicht finden kannst.',
+}
+
+user_prompt_builders = {
     'en': PromptBuilder(template=prompt_template_en),
     'de': PromptBuilder(template=prompt_template_de),
 }
+
diff --git a/gswikichat/rag.py b/gswikichat/rag.py
@@ -1,26 +1,21 @@
 # from haystack import Pipeline
 from haystack import Document
 from haystack.components.builders.answer_builder import AnswerBuilder
+from haystack.dataclasses import ChatMessage
 
 from .llm_config import llm
 from .logger import get_logger
-from .prompt import prompt_builders
+from .prompt import user_prompt_builders, system_prompts
 from .vector_store_interface import embedder, retriever, input_documents
 
 # Create logger instance from base logger config in `logger.py`
 logger = get_logger(__name__)
 
 
-def rag_pipeline(query: str = None, top_k: int = 3, lang: str = 'de'):
+def rag_pipeline(query: str, top_k: int = 3, lang: str = 'de'):
 
-    assert (query is not None)
-
-    if isinstance(query, str):
-        query = Document(content=query)
-
-    assert (isinstance(query, Document))
-
-    query_embedded = embedder.run([query])
+    query_document = Document(content=query)
+    query_embedded = embedder.run([query_document])
     query_embedding = query_embedded['documents'][0].embedding
 
     retriever_results = retriever.run(
@@ -35,24 +30,35 @@ def rag_pipeline(query: str = None, top_k: int = 3, lang: str = 'de'):
     for retriever_result_ in retriever_results:
         logger.debug(retriever_result_)
 
-    prompt_builder = prompt_builders[lang]
+    system_prompt = system_prompts[lang]
+    user_prompt_builder = user_prompt_builders[lang]
 
-    prompt_build = prompt_builder.run(
-        question=query.content,  # As a Document instance, .content returns a string
+    user_prompt_build = user_prompt_builder.run(
+        question=query_document.content,
         documents=retriever_results['documents']
     )
 
-    prompt = prompt_build['prompt']
+    prompt = user_prompt_build['prompt']
 
     logger.debug(f'{prompt=}')
 
-    response = llm.run(prompt=prompt, generation_kwargs=None)
+    messages = [
+        ChatMessage.from_system(system_prompt),
+        ChatMessage.from_user(prompt),
+    ]
+
+    response = llm.run(
+        messages, 
+        # generation_kwargs={"temperature": 0.2}
+    )
+
+    logger.debug(response)
 
     answer_builder = AnswerBuilder()
     answer_build = answer_builder.run(
-        query=query.content,  # As a Document class, .content returns the string
+        query=query_document.content,
         replies=response['replies'],
-        meta=response['meta'],
+        meta=[r.meta for r in response['replies']],
         documents=retriever_results['documents'],
         pattern=None,
         reference_pattern=None

Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,6 @@ async def api(query, top_k=3, lang='en'):`
`54`	`54`	`logger.debug(f'{answer=}')`
`55`	`55`
`56`	`56`	`return {`
`57`		`- "answer": answer.data,`
	`57`	`+ "answer": answer.data.content,`
`58`	`58`	`"sources": sources`
`59`	`59`	`}`