rti · rti · Feb 27, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -38,17 +38,10 @@ ENV PATH="/usr/local/ollama/bin:${PATH}"
 
 
 # Pull a language model (see LICENSE_STABLELM2.txt)
-ARG MODEL=stablelm2:1.6b-zephyr
+ARG MODEL=openchat
 ENV MODEL=${MODEL}
 RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL
 
-# Build a language model
-# ARG MODEL=discolm
-# ENV MODEL=${MODEL}
-# WORKDIR /tmp/model
-# COPY --chmod=644 Modelfile Modelfile
-# RUN curl --location https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q5_K_S.gguf?download=true --output discolm_german_7b_v1.Q5_K_S.gguf; ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama create ${MODEL} -f Modelfile && rm -rf /tmp/model
-
 
 # Setup the custom API and frontend
 WORKDIR /workspace
@@ -58,6 +51,11 @@ COPY --chmod=755 requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 
 
+# Load sentence-transformers model once in order to cache it in the image
+# TODO: ARG / ENV for embedder model
+RUN echo "from haystack.components.embedders import SentenceTransformersDocumentEmbedder\nSentenceTransformersDocumentEmbedder(model='svalabs/german-gpl-adapted-covid').warm_up()" | python3
+
+
 # Install frontend dependencies
 COPY --chmod=755 frontend/package.json frontend/package.json
 COPY --chmod=755 frontend/yarn.lock frontend/yarn.lock
@@ -69,7 +67,7 @@ COPY --chmod=755 json_input json_input
 
 
 # Copy backend for production
-COPY --chmod=644 gswikichat gswikichat
+COPY --chmod=755 gswikichat gswikichat
 
 
 # Copy and build frontend for production (into the frontend/dist folder)

diff --git a/Modelfile b/Modelfile
diff --git a/README.md b/README.md
@@ -9,16 +9,36 @@
 To build and run the container locally with hot reload on python files do:
 ```
 DOCKER_BUILDKIT=1 docker build . -t gbnc
-docker run  -v "$(pwd)/gswikichat":/workspace/gswikichat \
-            -p 8000:8000 --rm --name gbnc -it gbnc \
-            -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
+docker run  \
+  --env HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \
+  --volume "$(pwd)/gswikichat":/workspace/gswikichat \
+  --volume "$(pwd)/cache":/root/.cache \
+  --publish 8000:8000 \
+  --rm \
+  --interactive \
+  --tty \
+  --name gbnc \
+  gbnc
 ```
 Point your browser to http://localhost:8000/ and use the frontend.
 
 ### Runpod.io
 
 The container works on [runpod.io](https://www.runpod.io/) GPU instances. A [template is available here](https://runpod.io/gsc?template=0w8z55rf19&ref=yfvyfa0s).
 
+### Local development
+#### Backend
+```
+python -m venv .venv
+. ./.venv/bin/activate
+pip install -r requirements.txt
+```
+#### Frontend
+```
+cd frontend
+yarn dev
+```
+
 ## What's in the box
 
 ### Docker container
@@ -44,3 +64,4 @@ A [FastAPI](https://fastapi.tiangolo.com/) server is running in the container. I
 ### Frontend
 
 A minimal frontend lets the user input a question and renders the response from the system.
+
diff --git a/cache/.keep b/cache/.keep
diff --git a/frontend/src/components/field/FieldAnswer.vue b/frontend/src/components/field/FieldAnswer.vue
@@ -12,7 +12,7 @@
     <div v-else>
       <div v-if="response && response.sources">
         <div v-for="s in response.sources" :key="s.id">
-          <div v-if="s.score > 2" class="mb-2">
+          <div v-if="s.score > 0" class="mb-2">
             <details
               class="text-sm cursor-pointer text-light-distinct-text dark:text-dark-distinct-text"
             >

diff --git a/frontend/src/views/ChatView.vue b/frontend/src/views/ChatView.vue
@@ -95,6 +95,7 @@ const inputFocused = ref(false)
 // }
 
 function search() {
+  response.value = undefined;
   displayResponse.value = true
   fetch(`/api?q=${inputText.value}`)
     .then((response) => response.json())

diff --git a/gswikichat/__init__.py b/gswikichat/__init__.py
@@ -1,2 +1 @@
 from .api import *
-# from .haystack2beta_tutorial_InMemoryEmbeddingRetriever import *
diff --git a/gswikichat/api.py b/gswikichat/api.py
@@ -2,7 +2,6 @@
 from fastapi.staticfiles import StaticFiles
 from fastapi import FastAPI
 
-# from .rag import rag_pipeline
 from .rag import embedder, retriever, prompt_builder, llm, answer_builder
 from haystack import Document
 
@@ -17,55 +16,61 @@
 
 @app.get("/")
 async def root():
-    return RedirectResponse(url="/frontend/dist", status_code=302)
+    # return RedirectResponse(url="/frontend/dist", status_code=308)
+    return {}
 
 
 @app.get("/api")
-async def api(q):
+async def api(q, top_k = 3, lang = 'en'):
+    if not lang in ['en', 'de']:
+        raise Exception("language must be 'en' or 'de'") 
 
-    embedder, retriever, prompt_builder, llm, answer_builder
+    print(f"{q=}")
+    print(f"{top_k=}")
+    print(f"{lang=}")
 
-    # query = "How many languages are there?"
     query = Document(content=q)
 
-    result = embedder.run([query])
+    queryEmbedded = embedder.run([query])
+    queryEmbedding = queryEmbedded['documents'][0].embedding
 
-    results = retriever.run(
-        query_embedding=list(result['documents'][0].embedding),
+    retrieverResults = retriever.run(
+        query_embedding=list(queryEmbedding),
         filters=None,
-        top_k=None,
+        top_k=top_k,
         scale_score=None,
         return_embedding=None
     )
-    # .run(
-    #     result['documents'][0].embedding
-    # )
 
-    prompt = prompt_builder.run(documents=results['documents'])['prompt']
+    print("retriever results:")
+    for retrieverResult in retrieverResults:
+        print(retrieverResult)
 
-    response = llm.run(prompt=prompt, generation_kwargs=None)
-    # reply = response['replies'][0]
+    promptBuilder = prompt_builder[lang] 
+    promptBuild = promptBuilder.run(question=q, documents=retrieverResults['documents'])
+    prompt = promptBuild['prompt']
+
+    print(f"{prompt=}")
 
-    # rag_pipeline.connect("llm.replies", "answer_builder.replies")
-    # rag_pipeline.connect("llm.metadata", "answer_builder.meta")
-    # rag_pipeline.connect("retriever", "answer_builder.documents")
+    response = llm.run(prompt=prompt, generation_kwargs=None)
 
-    results = answer_builder.run(
+    answerBuild = answer_builder.run(
         query=q,
         replies=response['replies'],
         meta=response['meta'],
-        documents=results['documents'],
+        documents=retrieverResults['documents'],
         pattern=None,
         reference_pattern=None
     )
+    print("answerBuild", answerBuild)
+
+    answer = answerBuild['answers'][0]
+
+    sources= [{ "src": d.meta['src'], "content": d.content, "score": d.score } for d in answer.documents]
 
-    answer = results['answers'][0]
+    print("answer", answer)
 
     return {
         "answer": answer.data,
-        "sources": [{
-            "src": d.meta['src'],
-            "content": d.content,
-            "score": d.score
-        } for d in answer.documents]
+        "sources": sources
     }
diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py
@@ -1,7 +1,6 @@
 import os
 from haystack_integrations.components.generators.ollama import OllamaGenerator
 
-# TODO: discolm prompt https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
 print(f"Setting up ollama with {os.getenv('MODEL')}")
 llm = OllamaGenerator(
     model=os.getenv("MODEL"),

diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py
@@ -9,7 +9,7 @@
 # {% endfor %}
 # """
 
-prompt_template = """
+prompt_template_en = """
 <|system|>
 You are a helpful assistant. You answer questions based on the given documents.
 Answer based on the documents only. If the information is not in the documents,
@@ -25,6 +25,22 @@
 <|assistant|>
 """
 
+prompt_template_de = """
+<|system|>
+Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten.
+Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist,
+sage, dass du sie nicht finden kannst.
+<|endoftext|>
+<|user|>
+Dokumente:
+{% for doc in documents %}
+    {{ doc.content }}
+{% endfor %}
+Mit diesen Dokumenten, beantworte die folgende Frage: {{question}}
+<|endoftext|>
+<|assistant|>
+"""
+
 # prompt_template = """
 # Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents.
 # Documents:
@@ -33,4 +49,7 @@
 # {% endfor %}
 # """
 
-prompt_builder = PromptBuilder(template=prompt_template)
+prompt_builder = {
+ 'en': PromptBuilder(template=prompt_template_en),
+ 'de': PromptBuilder(template=prompt_template_de),
+}
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@
		from .api import *
		# from .haystack2beta_tutorial_InMemoryEmbeddingRetriever import *