rti · rti · Feb 27, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -37,27 +37,30 @@ COPY --from=ollama /usr/bin/ollama /usr/local/ollama/bin/ollama
 ENV PATH="/usr/local/ollama/bin:${PATH}"
 
 
-# Pull a language model (see LICENSE_STABLELM2.txt)
-ARG MODEL=stablelm2:1.6b-zephyr
-ENV MODEL=${MODEL}
-RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL
-
-# Build a language model
-# ARG MODEL=discolm
-# ENV MODEL=${MODEL}
-# WORKDIR /tmp/model
-# COPY --chmod=644 Modelfile Modelfile
-# RUN curl --location https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q5_K_S.gguf?download=true --output discolm_german_7b_v1.Q5_K_S.gguf; ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama create ${MODEL} -f Modelfile && rm -rf /tmp/model
-
-
-# Setup the custom API and frontend
+# Setup the app in workspace
 WORKDIR /workspace
 
 # Install backend dependencies
 COPY --chmod=755 requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 
 
+# Pull a language model (see LICENSE_STABLELM2.txt)
+# ARG OLLAMA_MODEL_NAME=openchat
+ARG OLLAMA_MODEL_NAME=stablelm2:1.6b-zephyr
+ARG OLLAMA_URL=http://localhost:11434
+
+ENV OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME}
+ENV OLLAMA_URL=${OLLAMA_URL}
+
+RUN ollama serve & while ! curl ${OLLAMA_URL}; do sleep 1; done; ollama pull $OLLAMA_MODEL_NAME
+
+
+# Load sentence-transformers model once in order to cache it in the image
+# TODO: ARG / ENV for embedder model
+RUN echo "from haystack.components.embedders import SentenceTransformersDocumentEmbedder\nSentenceTransformersDocumentEmbedder(model='svalabs/german-gpl-adapted-covid').warm_up()" | python3
+
+
 # Install frontend dependencies
 COPY --chmod=755 frontend/package.json frontend/package.json
 COPY --chmod=755 frontend/yarn.lock frontend/yarn.lock
@@ -69,7 +72,7 @@ COPY --chmod=755 json_input json_input
 
 
 # Copy backend for production
-COPY --chmod=644 gswikichat gswikichat
+COPY --chmod=755 gswikichat gswikichat
 
 
 # Copy and build frontend for production (into the frontend/dist folder)

diff --git a/Modelfile b/Modelfile
diff --git a/README.md b/README.md
@@ -9,16 +9,36 @@
 To build and run the container locally with hot reload on python files do:
 ```
 DOCKER_BUILDKIT=1 docker build . -t gbnc
-docker run  -v "$(pwd)/gswikichat":/workspace/gswikichat \
-            -p 8000:8000 --rm --name gbnc -it gbnc \
-            -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
+docker run  \
+  --env HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \
+  --volume "$(pwd)/gswikichat":/workspace/gswikichat \
+  --volume gbnc_cache:/root/.cache \
+  --publish 8000:8000 \
+  --rm \
+  --interactive \
+  --tty \
+  --name gbnc \
+  gbnc
 ```
 Point your browser to http://localhost:8000/ and use the frontend.
 
 ### Runpod.io
 
 The container works on [runpod.io](https://www.runpod.io/) GPU instances. A [template is available here](https://runpod.io/gsc?template=0w8z55rf19&ref=yfvyfa0s).
 
+### Local development
+#### Backend
+```
+python -m venv .venv
+. ./.venv/bin/activate
+pip install -r requirements.txt
+```
+#### Frontend
+```
+cd frontend
+yarn dev
+```
+
 ## What's in the box
 
 ### Docker container
@@ -44,3 +64,4 @@ A [FastAPI](https://fastapi.tiangolo.com/) server is running in the container. I
 ### Frontend
 
 A minimal frontend lets the user input a question and renders the response from the system.
+
diff --git a/cache/.keep b/cache/.keep
diff --git a/frontend/src/components/field/FieldAnswer.vue b/frontend/src/components/field/FieldAnswer.vue
@@ -12,7 +12,7 @@
     <div v-else>
       <div v-if="response && response.sources">
         <div v-for="s in response.sources" :key="s.id">
-          <div v-if="s.score > 2" class="mb-2">
+          <div v-if="s.score > 0" class="mb-2">
             <details
               class="text-sm cursor-pointer text-light-distinct-text dark:text-dark-distinct-text"
             >

diff --git a/frontend/src/views/ChatView.vue b/frontend/src/views/ChatView.vue
@@ -95,8 +95,9 @@ const inputFocused = ref(false)
 // }
 
 function search() {
+  response.value = undefined;
   displayResponse.value = true
-  fetch(`/api?q=${inputText.value}`)
+  fetch(`/api?query=${inputText.value}`)
     .then((response) => response.json())
     .then((data) => {
       response.value = data

diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
@@ -5,7 +5,7 @@ import vue from '@vitejs/plugin-vue'
 
 // https://vitejs.dev/config/
 export default defineConfig({
-  base: '/frontend/dist',
+  base: '/',
   plugins: [
     vue(),
   ],

diff --git a/gswikichat/__init__.py b/gswikichat/__init__.py
@@ -1,2 +1 @@
 from .api import *
-# from .haystack2beta_tutorial_InMemoryEmbeddingRetriever import *
diff --git a/gswikichat/api.py b/gswikichat/api.py
@@ -1,71 +1,59 @@
-from fastapi.responses import RedirectResponse
+from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi import FastAPI
 
-# from .rag import rag_pipeline
-from .rag import embedder, retriever, prompt_builder, llm, answer_builder
+from .rag import rag_pipeline
+
 from haystack import Document
+from .logger import get_logger
+
+# Create logger instance from base logger config in `logger.py`
+logger = get_logger(__name__)
 
+FRONTEND_STATIC_DIR = './frontend/dist'
 
 app = FastAPI()
+
 app.mount(
-    "/frontend/dist",
-    StaticFiles(directory="frontend/dist", html=True),
-    name="frontend"
+    "/assets",
+    StaticFiles(directory=f"{FRONTEND_STATIC_DIR}/assets"),
+    name="frontend-assets"
 )
 
-
 @app.get("/")
 async def root():
-    return RedirectResponse(url="/frontend/dist", status_code=302)
+    return FileResponse(f"{FRONTEND_STATIC_DIR}/index.html")
 
+@app.get("/favicon.ico")
+async def favicon():
+    return FileResponse(f"{FRONTEND_STATIC_DIR}/favicon.ico")
 
 @app.get("/api")
-async def api(q):
-
-    embedder, retriever, prompt_builder, llm, answer_builder
-
-    # query = "How many languages are there?"
-    query = Document(content=q)
-
-    result = embedder.run([query])
-
-    results = retriever.run(
-        query_embedding=list(result['documents'][0].embedding),
-        filters=None,
-        top_k=None,
-        scale_score=None,
-        return_embedding=None
+async def api(query, top_k=3, lang='en'):
+    if not lang in ['en', 'de']:
+        raise Exception("language must be 'en' or 'de'")
+
+    logger.debug(f'{query=}')  # Assuming we change the input name
+    logger.debug(f'{top_k=}')
+    logger.debug(f'{lang=}')
+
+    answer = rag_pipeline(
+        query=query,
+        top_k=top_k,
+        lang=lang
     )
-    # .run(
-    #     result['documents'][0].embedding
-    # )
-
-    prompt = prompt_builder.run(documents=results['documents'])['prompt']
 
-    response = llm.run(prompt=prompt, generation_kwargs=None)
-    # reply = response['replies'][0]
-
-    # rag_pipeline.connect("llm.replies", "answer_builder.replies")
-    # rag_pipeline.connect("llm.metadata", "answer_builder.meta")
-    # rag_pipeline.connect("retriever", "answer_builder.documents")
-
-    results = answer_builder.run(
-        query=q,
-        replies=response['replies'],
-        meta=response['meta'],
-        documents=results['documents'],
-        pattern=None,
-        reference_pattern=None
-    )
+    sources = [
+        {
+            "src": d_.meta['src'],
+            "content": d_.content,
+            "score": d_.score
+        } for d_ in answer.documents
+    ]
 
-    answer = results['answers'][0]
+    logger.debug(f'{answer=}')
 
     return {
-        "answer": answer.data,
-        "sources": [{
-            "src": d.meta['src'],
-            "content": d.content,
-            "score": d.score
-        } for d in answer.documents]
+        "answer": answer.data.content,
+        "sources": sources
     }
diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py
@@ -1,9 +1,23 @@
 import os
-from haystack_integrations.components.generators.ollama import OllamaGenerator
+from haystack_integrations.components.generators.ollama import OllamaChatGenerator
 
-# TODO: discolm prompt https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
-print(f"Setting up ollama with {os.getenv('MODEL')}")
-llm = OllamaGenerator(
-    model=os.getenv("MODEL"),
-    url="http://localhost:11434/api/generate"
+from .logger import get_logger
+
+# Create logger instance from base logger config in `logger.py`
+logger = get_logger(__name__)
+
+OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME")
+OLLAMA_URL = os.environ.get("OLLAMA_URL")
+OLLAMA_CHAT_URL = f"{OLLAMA_URL}/api/chat"
+
+logger.info(f'Using {OLLAMA_MODEL_NAME=}')
+logger.info(f'Endpoint: {OLLAMA_URL=}')
+logger.info(f'Generate: {OLLAMA_CHAT_URL=}')
+
+logger.info(f"Setting up ollama with {OLLAMA_MODEL_NAME}")
+
+llm = OllamaChatGenerator(
+    model=OLLAMA_MODEL_NAME,
+    url=OLLAMA_CHAT_URL,
+    timeout=120
 )
diff --git a/gswikichat/logger.py b/gswikichat/logger.py
@@ -0,0 +1,30 @@
+import logging
+import sys
+
+
+def get_logger(name):
+    # Create a logger
+    # Source: https://docs.python.org/3/howto/logging.html
+    logging.basicConfig(
+        filename='gbnc_api.log',
+        encoding='utf-8',
+        level=logging.DEBUG
+    )
+
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)  # Set the logging level
+
+    # Source: stackoverflow.com/questions/14058453/
+    #   making-python-loggers-output-all-messages-
+    #   to-stdout-in-addition-to-log-file
+
+    # Create console handler and set level to debug
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+    return logger
diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py
@@ -1,36 +1,28 @@
 from haystack.components.builders.prompt_builder import PromptBuilder
 
-# prompt_template = """
-# Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents.
-# Documents:
-# {% for doc in documents %}
-#     If {{ doc.content }} answers the Question: {{question}}
-#     Then return {{ doc.meta["src"] }}
-# {% endfor %}
-# """
-
-prompt_template = """
-<|system|>
-You are a helpful assistant. You answer questions based on the given documents.
-Answer based on the documents only. If the information is not in the documents,
-say that you cannot find the information.
-<|endoftext|>
-<|user|>
+prompt_template_en = """
 Documents:
-{% for doc in documents %}
-    {{ doc.content }}
+{% for doc_ in documents %}
+    {{ doc_.content }}
 {% endfor %}
 With this documents, answer the following question: {{question}}
-<|endoftext|>
-<|assistant|>
 """
 
-# prompt_template = """
-# Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents.
-# Documents:
-# If {{ doc.content }} answers the Question: {{question}}
-# Then only return {{ doc.meta["src"] }} and nothing at all.
-# {% endfor %}
-# """
+prompt_template_de = """
+Dokumente:
+{% for doc_ in documents %}
+    {{ doc_.content }}
+{% endfor %}
+Mit diesen Dokumenten, beantworte die folgende Frage: {{question}}
+"""
+
+system_prompts = {
+    'en': 'You are a helpful assistant. You answer questions based on the given documents. Answer based on the documents only. If the information is not in the documents, say that you cannot find the information.',
+    'de': 'Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten. Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist, sage, dass du sie nicht finden kannst.',
+}
+
+user_prompt_builders = {
+    'en': PromptBuilder(template=prompt_template_en),
+    'de': PromptBuilder(template=prompt_template_de),
+}
 
-prompt_builder = PromptBuilder(template=prompt_template)
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@
		from .api import *
		# from .haystack2beta_tutorial_InMemoryEmbeddingRetriever import *