From ff3fe68e11ba44d2c6d5f42b99756bf53858d12f Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:35:20 +0000 Subject: [PATCH 01/43] feat: cache huggingface models --- README.md | 11 ++++++++--- cache/.keep | 0 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 cache/.keep diff --git a/README.md b/README.md index 2e2a009..c4dabd5 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,14 @@ To build and run the container locally with hot reload on python files do: ``` DOCKER_BUILDKIT=1 docker build . -t gbnc -docker run -v "$(pwd)/gswikichat":/workspace/gswikichat \ - -p 8000:8000 --rm --name gbnc -it gbnc \ - -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN +docker run \ + -v "$(pwd)/gswikichat":/workspace/gswikichat \ + -v "$(pwd)/cache":/root/.cache \ + -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN + -p 8000:8000 \ + --rm -it \ + --name gbnc \ + gbnc ``` Point your browser to http://localhost:8000/ and use the frontend. diff --git a/cache/.keep b/cache/.keep new file mode 100644 index 0000000..e69de29 From 38a3bf950b2a0171161a02c00f854e96bc9a0229 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:35:32 +0000 Subject: [PATCH 02/43] fix: sentence_transformers version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b03f924..7369c03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,7 +32,7 @@ python-dotenv==1.0.1 pytz==2023.3.post1 PyYAML==6.0.1 requests==2.31.0 -sentence-transformers>=2.2.0 +sentence-transformers==2.2.0 six==1.16.0 sniffio==1.3.0 starlette==0.35.1 From 3fb6fd0f5d6f944a319637843bb1812495d713d1 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:40:01 +0000 Subject: [PATCH 03/43] chore: remove custom model based on modelfile --- Dockerfile | 7 ------- Modelfile | 2 -- 2 files changed, 9 deletions(-) delete mode 100644 Modelfile diff --git a/Dockerfile b/Dockerfile index 6202b20..796ccc4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,13 +42,6 @@ ARG MODEL=stablelm2:1.6b-zephyr ENV MODEL=${MODEL} RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL -# Build a language model -# ARG MODEL=discolm -# ENV MODEL=${MODEL} -# WORKDIR /tmp/model -# COPY --chmod=644 Modelfile Modelfile -# RUN curl --location https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q5_K_S.gguf?download=true --output discolm_german_7b_v1.Q5_K_S.gguf; ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama create ${MODEL} -f Modelfile && rm -rf /tmp/model - # Setup the custom API and frontend WORKDIR /workspace diff --git a/Modelfile b/Modelfile deleted file mode 100644 index e0c49cb..0000000 --- a/Modelfile +++ /dev/null @@ -1,2 +0,0 @@ -FROM ./discolm_german_7b_v1.Q5_K_S.gguf - From a4c729453f97c69e6378431ec8902ec50ca7c5d3 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:50:34 +0000 Subject: [PATCH 04/43] fix(frontend): do not filter by score for now TBD --- frontend/src/components/field/FieldAnswer.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/field/FieldAnswer.vue b/frontend/src/components/field/FieldAnswer.vue index 5c78d55..99afac7 100644 --- a/frontend/src/components/field/FieldAnswer.vue +++ b/frontend/src/components/field/FieldAnswer.vue @@ -12,7 +12,7 @@
-
+
From d38c5f052d3a8a617438a5c126c7571e7413e6b1 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:52:31 +0000 Subject: [PATCH 05/43] chore: remove debug/test code --- gswikichat/__init__.py | 1 - gswikichat/api.py | 11 ---- gswikichat/vector_store_interface.py | 77 ---------------------------- 3 files changed, 89 deletions(-) diff --git a/gswikichat/__init__.py b/gswikichat/__init__.py index eab6613..0a0e47b 100644 --- a/gswikichat/__init__.py +++ b/gswikichat/__init__.py @@ -1,2 +1 @@ from .api import * -# from .haystack2beta_tutorial_InMemoryEmbeddingRetriever import * diff --git a/gswikichat/api.py b/gswikichat/api.py index a05ff27..5dc6677 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -2,7 +2,6 @@ from fastapi.staticfiles import StaticFiles from fastapi import FastAPI -# from .rag import rag_pipeline from .rag import embedder, retriever, prompt_builder, llm, answer_builder from haystack import Document @@ -23,9 +22,6 @@ async def root(): @app.get("/api") async def api(q): - embedder, retriever, prompt_builder, llm, answer_builder - - # query = "How many languages are there?" query = Document(content=q) result = embedder.run([query]) @@ -37,18 +33,11 @@ async def api(q): scale_score=None, return_embedding=None ) - # .run( - # result['documents'][0].embedding - # ) prompt = prompt_builder.run(documents=results['documents'])['prompt'] response = llm.run(prompt=prompt, generation_kwargs=None) - # reply = response['replies'][0] - # rag_pipeline.connect("llm.replies", "answer_builder.replies") - # rag_pipeline.connect("llm.metadata", "answer_builder.meta") - # rag_pipeline.connect("retriever", "answer_builder.documents") results = answer_builder.run( query=q, diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index bc99b9d..106d050 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -1,16 +1,12 @@ import os import json -# from sentence_transformers import SentenceTransformer from tqdm import tqdm from haystack import Document # , Pipeline from haystack.components.embedders import SentenceTransformersDocumentEmbedder -# from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.document_stores.in_memory import InMemoryDocumentStore -# from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever -# from haystack.components.writers import DocumentWriter from haystack.document_stores.types.policy import DuplicatePolicy HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') @@ -64,54 +60,9 @@ # embedding_dim=768, # duplicate_documents="overwrite" ) -# document_store.write_documents(input_documents) - -# TODO Introduce Jina.AI from HuggingFace. Establish env-variable for trust_... - -# basic_transformer_models = [ -# "all-MiniLM-L6-v2", -# "xlm-clm-ende-1024", -# "xlm-mlm-ende-1024", -# "bert-base-german-cased", -# "bert-base-german-dbmdz-cased", -# "bert-base-german-dbmdz-uncased", -# "distilbert-base-german-cased", -# "xlm-roberta-large-finetuned-conll03-german", -# "deutsche-telekom/gbert-large-paraphrase-cosine" -# ] - -# https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 -# sentence_transformer_model = "all-MiniLM-L6-v2" -# 3 minutes to batch 82 - -# https://huggingface.co/deutsche-telekom/gbert-large-paraphrase-cosine -# sentence_transformer_model = 'deutsche-telekom/gbert-large-paraphrase-cosine' -# 76 minutes to batch 82 - -# https://huggingface.co/jinaai/jina-embeddings-v2-base-de -# sentence_transformer_model = 'jinaai/jina-embeddings-v2-base-de' -# Cannot find or load the embedding model -# Unknown minutes to batch 82 - -# https://huggingface.co/aari1995/German_Semantic_STS_V2 -# sentence_transformer_model = 'aari1995/German_Semantic_STS_V2' -# 75 minutes to batch 82 - -# https://huggingface.co/Sahajtomar/German-semantic -# sentence_transformer_model = 'Sahajtomar/German-semantic' -# 72 minutes to batch 82 # https://huggingface.co/svalabs/german-gpl-adapted-covid sentence_transformer_model = 'svalabs/german-gpl-adapted-covid' -# 2 minutes to batch 82 - -# https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german -# sentence_transformer_model = 'PM-AI/bi-encoder_msmarco_bert-base_german' -# 14 minutes to batch 82 - -# https://huggingface.co/JoBeer/german-semantic-base -# sentence_transformer_model = 'JoBeer/german-semantic-base' -# 22 minutes to batch 82 print(f'Sentence Transformer Name:{sentence_transformer_model}') @@ -122,26 +73,9 @@ # token=HUGGING_FACE_HUB_TOKEN ) -# hg_embedder = SentenceTransformer( -# "jinaai/jina-embeddings-v2-base-de", -# token=HUGGING_FACE_HUB_TOKEN -# ) - embedder.warm_up() documents_with_embeddings = embedder.run(input_documents) -# documents_with_embeddings = embedder.encode(input_documents) - - -# print('\n\n') -# # print(documents_with_embeddings['documents']) -# print(type(documents_with_embeddings['documents'])) -# print(len(documents_with_embeddings['documents'])) -# print(dir(documents_with_embeddings['documents'][0])) -# print('\n\n') -# print(type(embedder.model)) -# print('\n\n') -# # print(dir(hg_embedder)) document_store.write_documents( @@ -155,14 +89,3 @@ top_k=top_k ) -# writer = DocumentWriter(document_store=document_store) - -# indexing_pipeline = Pipeline() -# indexing_pipeline.add_component("embedder", embedder) -# indexing_pipeline.add_component("writer", writer) -# indexing_pipeline.connect("embedder", "writer") -# indexing_pipeline.run( -# { -# "embedder": {"documents": input_documents} -# } -# ) From dc4501a2042743cc5b6f275014a85d66de20d375 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:52:59 +0000 Subject: [PATCH 06/43] fix: required sentence_transformers version was actually > 2.2.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7369c03..723011a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,7 +32,7 @@ python-dotenv==1.0.1 pytz==2023.3.post1 PyYAML==6.0.1 requests==2.31.0 -sentence-transformers==2.2.0 +sentence-transformers==2.3.1 six==1.16.0 sniffio==1.3.0 starlette==0.35.1 From 42cdcc5e5cf4847e4bfe7c4db2dbf4b7237bc1d5 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 22:55:25 +0000 Subject: [PATCH 07/43] docs: add notes about embedding models to readme --- README.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/README.md b/README.md index c4dabd5..528edaf 100644 --- a/README.md +++ b/README.md @@ -49,3 +49,52 @@ A [FastAPI](https://fastapi.tiangolo.com/) server is running in the container. I ### Frontend A minimal frontend lets the user input a question and renders the response from the system. + +## Sentence Transformers Statistics + +``` +basic_transformer_models = [ + "all-MiniLM-L6-v2", + "xlm-clm-ende-1024", + "xlm-mlm-ende-1024", + "bert-base-german-cased", + "bert-base-german-dbmdz-cased", + "bert-base-german-dbmdz-uncased", + "distilbert-base-german-cased", + "xlm-roberta-large-finetuned-conll03-german", + "deutsche-telekom/gbert-large-paraphrase-cosine" +] + +https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 +sentence_transformer_model = "all-MiniLM-L6-v2" +3 minutes to batch 82 + +https://huggingface.co/deutsche-telekom/gbert-large-paraphrase-cosine +sentence_transformer_model = 'deutsche-telekom/gbert-large-paraphrase-cosine' +76 minutes to batch 82 + +https://huggingface.co/jinaai/jina-embeddings-v2-base-de +sentence_transformer_model = 'jinaai/jina-embeddings-v2-base-de' +Cannot find or load the embedding model +Unknown minutes to batch 82 + +https://huggingface.co/aari1995/German_Semantic_STS_V2 +sentence_transformer_model = 'aari1995/German_Semantic_STS_V2' +75 minutes to batch 82 + +https://huggingface.co/Sahajtomar/German-semantic +sentence_transformer_model = 'Sahajtomar/German-semantic' +72 minutes to batch 82 + +https://huggingface.co/svalabs/german-gpl-adapted-covid +ntence_transformer_model = 'svalabs/german-gpl-adapted-covid' +2 minutes to batch 82 + +https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german +sentence_transformer_model = 'PM-AI/bi-encoder_msmarco_bert-base_german' +14 minutes to batch 82 + +https://huggingface.co/JoBeer/german-semantic-base +sentence_transformer_model = 'JoBeer/german-semantic-base' +22 minutes to batch 82 +``` From 13bc12eb1ca457a3c6ce65cb386dd41675922bfa Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 23:02:11 +0000 Subject: [PATCH 08/43] chore: add debug output to api.py --- gswikichat/api.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index 5dc6677..b6f49f9 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -21,40 +21,49 @@ async def root(): @app.get("/api") async def api(q): + print("query: ", q) query = Document(content=q) - result = embedder.run([query]) + queryEmbedded = embedder.run([query]) + queryEmbedding = queryEmbedded['documents'][0].embedding - results = retriever.run( - query_embedding=list(result['documents'][0].embedding), + retrieverResults = retriever.run( + query_embedding=list(queryEmbedding), filters=None, top_k=None, scale_score=None, return_embedding=None ) - prompt = prompt_builder.run(documents=results['documents'])['prompt'] + print("retriever results:") + for retrieverResult in retrieverResults: + print(retrieverResult) - response = llm.run(prompt=prompt, generation_kwargs=None) + promptBuild = prompt_builder.run(documents=retrieverResults['documents']) + prompt = promptBuild['prompt'] + + print("prompt: ", prompt) + response = llm.run(prompt=prompt, generation_kwargs=None) - results = answer_builder.run( + answerBuild = answer_builder.run( query=q, replies=response['replies'], meta=response['meta'], - documents=results['documents'], + documents=retrieverResults['documents'], pattern=None, reference_pattern=None ) + print("answerBuild", answerBuild) + + answer = answerBuild['answers'][0] + + sources= [{ "src": d.meta['src'], "content": d.content, "score": d.score } for d in answer.documents] - answer = results['answers'][0] + print("answer", answer) return { "answer": answer.data, - "sources": [{ - "src": d.meta['src'], - "content": d.content, - "score": d.score - } for d in answer.documents] + "sources": sources } From 4933a9a89facafa6dc2b0bc69ed8252a63813682 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 23:03:10 +0000 Subject: [PATCH 09/43] fix: question in prompt --- gswikichat/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index b6f49f9..7a0d28c 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -40,7 +40,7 @@ async def api(q): for retrieverResult in retrieverResults: print(retrieverResult) - promptBuild = prompt_builder.run(documents=retrieverResults['documents']) + promptBuild = prompt_builder.run(question=q, documents=retrieverResults['documents']) prompt = promptBuild['prompt'] print("prompt: ", prompt) From b23833b720975b7a763cb224e84b7d0ca53e2e4d Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 23:03:20 +0000 Subject: [PATCH 10/43] chore: top_k 3 results for now --- gswikichat/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index 7a0d28c..ce2a144 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -31,7 +31,7 @@ async def api(q): retrieverResults = retriever.run( query_embedding=list(queryEmbedding), filters=None, - top_k=None, + top_k=3, scale_score=None, return_embedding=None ) From da1017b3da12bce60af1e2cc9a315ab207079f3e Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 23:04:12 +0000 Subject: [PATCH 11/43] wip: embeddings cache --- gswikichat/vector_store_interface.py | 37 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index 106d050..d55d230 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -10,6 +10,8 @@ from haystack.document_stores.types.policy import DuplicatePolicy HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') +EMBEDDING_CACHE_FILE = '/tmp/gbnc_embeddings.json' + top_k = 5 input_documents = [] @@ -63,28 +65,37 @@ # https://huggingface.co/svalabs/german-gpl-adapted-covid sentence_transformer_model = 'svalabs/german-gpl-adapted-covid' - -print(f'Sentence Transformer Name:{sentence_transformer_model}') +print(f'Sentence Transformer Name: {sentence_transformer_model}') embedder = SentenceTransformersDocumentEmbedder( model=sentence_transformer_model, - # model="T-Systems-onsite/german-roberta-sentence-transformer-v2", - # model="jinaai/jina-embeddings-v2-base-de", - # token=HUGGING_FACE_HUB_TOKEN ) - embedder.warm_up() -documents_with_embeddings = embedder.run(input_documents) - -document_store.write_documents( - documents=documents_with_embeddings['documents'], - policy=DuplicatePolicy.OVERWRITE -) +# if os.path.isfile(EMBEDDING_CACHE_FILE): +# print("[INFO] Loading embeddings from cache") +# +# with open(EMBEDDING_CACHE_FILE, 'r') as f: +# documentsDict = json.load(f) +# document_store.write_documents( +# documents=[Document.from_dict(d) for d in documentsDict], +# policy=DuplicatePolicy.OVERWRITE +# ) +# +# else: +if True: + embedded = embedder.run(input_documents) + document_store.write_documents( + documents=embedded['documents'], + policy=DuplicatePolicy.OVERWRITE + ) + + with open(EMBEDDING_CACHE_FILE, 'w') as f: + documentsDict = [Document.to_dict(d) for d in embedded['documents']] + json.dump(documentsDict, f) retriever = InMemoryEmbeddingRetriever( - # embedding_model="sentence-transformers/all-MiniLM-L6-v2", document_store=document_store, top_k=top_k ) From 41ff046faa849431a8e83ed2e4caff78ae6e6522 Mon Sep 17 00:00:00 2001 From: roti Date: Thu, 1 Feb 2024 23:04:27 +0000 Subject: [PATCH 12/43] feat: document splitter --- gswikichat/vector_store_interface.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index d55d230..a470a0a 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -8,6 +8,8 @@ from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever from haystack.document_stores.types.policy import DuplicatePolicy +from haystack.components.preprocessors import DocumentSplitter +from haystack.components.preprocessors import DocumentCleaner HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') EMBEDDING_CACHE_FILE = '/tmp/gbnc_embeddings.json' @@ -16,7 +18,8 @@ input_documents = [] json_dir = 'json_input' -json_fname = 'excellent-articles_10_paragraphs.json' +json_fname = 'excellent-articles_10.json' + json_fpath = os.path.join(json_dir, json_fname) if os.path.isfile(json_fpath): @@ -28,11 +31,11 @@ for k, v in tqdm(json_obj.items()): print(f"Loading {k}") input_documents.append(Document(content=v, meta={"src": k})) + elif isinstance(json_obj, list): for obj_ in tqdm(json_obj): url = obj_['meta'] content = obj_['content'] - input_documents.append( Document( content=content, @@ -55,7 +58,14 @@ ), ] -# Write documents to InMemoryDocumentStore +# cleaner = DocumentCleaner( +# remove_empty_lines=True, +# remove_extra_whitespaces=True, +# remove_repeated_substrings=False) +# input_documents = cleaner.run(input_documents)['documents'] + +splitter = DocumentSplitter(split_by="sentence", split_length=20, split_overlap=0) +input_documents = splitter.run(input_documents)['documents'] document_store = InMemoryDocumentStore( embedding_similarity_function="cosine", From 4e69697a74e16c965c65d13968bf1fdc7dd9c51d Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Sat, 3 Feb 2024 00:13:21 +0100 Subject: [PATCH 13/43] Update .dockerignore --- .dockerignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index b6853c9..441b043 100644 --- a/.dockerignore +++ b/.dockerignore @@ -100,4 +100,4 @@ frontend/dist !src/ !package.json !yarn.lock -!.yarnrc \ No newline at end of file +!.yarnrc From 0ee6ed5d1a55b85a22d2d75a947f30d2e336da71 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Sun, 4 Feb 2024 13:25:21 +0000 Subject: [PATCH 14/43] docs: note on how to dev locally --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 528edaf..bb619f5 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,19 @@ Point your browser to http://localhost:8000/ and use the frontend. The container works on [runpod.io](https://www.runpod.io/) GPU instances. A [template is available here](https://runpod.io/gsc?template=0w8z55rf19&ref=yfvyfa0s). +### Local development +#### Backend +``` +python -m venv .venv +. ./.venv/bin/activate +pip install -r requirements.txt +``` +#### Frontend +``` +cd frontend +yarn dev +``` + ## What's in the box ### Docker container From 7a2c9553f668d60aa964f84fae0f6ecc050a271f Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Sun, 4 Feb 2024 13:26:44 +0000 Subject: [PATCH 15/43] docs: add research_log.md --- README.md | 48 ------------------------------------------------ research_log.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) create mode 100644 research_log.md diff --git a/README.md b/README.md index bb619f5..8aa376c 100644 --- a/README.md +++ b/README.md @@ -63,51 +63,3 @@ A [FastAPI](https://fastapi.tiangolo.com/) server is running in the container. I A minimal frontend lets the user input a question and renders the response from the system. -## Sentence Transformers Statistics - -``` -basic_transformer_models = [ - "all-MiniLM-L6-v2", - "xlm-clm-ende-1024", - "xlm-mlm-ende-1024", - "bert-base-german-cased", - "bert-base-german-dbmdz-cased", - "bert-base-german-dbmdz-uncased", - "distilbert-base-german-cased", - "xlm-roberta-large-finetuned-conll03-german", - "deutsche-telekom/gbert-large-paraphrase-cosine" -] - -https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 -sentence_transformer_model = "all-MiniLM-L6-v2" -3 minutes to batch 82 - -https://huggingface.co/deutsche-telekom/gbert-large-paraphrase-cosine -sentence_transformer_model = 'deutsche-telekom/gbert-large-paraphrase-cosine' -76 minutes to batch 82 - -https://huggingface.co/jinaai/jina-embeddings-v2-base-de -sentence_transformer_model = 'jinaai/jina-embeddings-v2-base-de' -Cannot find or load the embedding model -Unknown minutes to batch 82 - -https://huggingface.co/aari1995/German_Semantic_STS_V2 -sentence_transformer_model = 'aari1995/German_Semantic_STS_V2' -75 minutes to batch 82 - -https://huggingface.co/Sahajtomar/German-semantic -sentence_transformer_model = 'Sahajtomar/German-semantic' -72 minutes to batch 82 - -https://huggingface.co/svalabs/german-gpl-adapted-covid -ntence_transformer_model = 'svalabs/german-gpl-adapted-covid' -2 minutes to batch 82 - -https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german -sentence_transformer_model = 'PM-AI/bi-encoder_msmarco_bert-base_german' -14 minutes to batch 82 - -https://huggingface.co/JoBeer/german-semantic-base -sentence_transformer_model = 'JoBeer/german-semantic-base' -22 minutes to batch 82 -``` diff --git a/research_log.md b/research_log.md new file mode 100644 index 0000000..a6d31ed --- /dev/null +++ b/research_log.md @@ -0,0 +1,48 @@ +## Sentence Transformers Statistics + +``` +basic_transformer_models = [ + "all-MiniLM-L6-v2", + "xlm-clm-ende-1024", + "xlm-mlm-ende-1024", + "bert-base-german-cased", + "bert-base-german-dbmdz-cased", + "bert-base-german-dbmdz-uncased", + "distilbert-base-german-cased", + "xlm-roberta-large-finetuned-conll03-german", + "deutsche-telekom/gbert-large-paraphrase-cosine" +] + +https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 +sentence_transformer_model = "all-MiniLM-L6-v2" +3 minutes to batch 82 + +https://huggingface.co/deutsche-telekom/gbert-large-paraphrase-cosine +sentence_transformer_model = 'deutsche-telekom/gbert-large-paraphrase-cosine' +76 minutes to batch 82 + +https://huggingface.co/jinaai/jina-embeddings-v2-base-de +sentence_transformer_model = 'jinaai/jina-embeddings-v2-base-de' +Cannot find or load the embedding model +Unknown minutes to batch 82 + +https://huggingface.co/aari1995/German_Semantic_STS_V2 +sentence_transformer_model = 'aari1995/German_Semantic_STS_V2' +75 minutes to batch 82 + +https://huggingface.co/Sahajtomar/German-semantic +sentence_transformer_model = 'Sahajtomar/German-semantic' +72 minutes to batch 82 + +https://huggingface.co/svalabs/german-gpl-adapted-covid +ntence_transformer_model = 'svalabs/german-gpl-adapted-covid' +2 minutes to batch 82 + +https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german +sentence_transformer_model = 'PM-AI/bi-encoder_msmarco_bert-base_german' +14 minutes to batch 82 + +https://huggingface.co/JoBeer/german-semantic-base +sentence_transformer_model = 'JoBeer/german-semantic-base' +22 minutes to batch 82 +``` From 0a5e2be984558f494fed0b82f597cc73be0e5b62 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Mon, 5 Feb 2024 08:14:41 +0000 Subject: [PATCH 16/43] feat: set top_k via api --- gswikichat/api.py | 4 ++-- gswikichat/vector_store_interface.py | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index ce2a144..8ea3097 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -20,7 +20,7 @@ async def root(): @app.get("/api") -async def api(q): +async def api(q, top_k = 3): print("query: ", q) query = Document(content=q) @@ -31,7 +31,7 @@ async def api(q): retrieverResults = retriever.run( query_embedding=list(queryEmbedding), filters=None, - top_k=3, + top_k=top_k, scale_score=None, return_embedding=None ) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index a470a0a..8cb1b28 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -105,8 +105,5 @@ documentsDict = [Document.to_dict(d) for d in embedded['documents']] json.dump(documentsDict, f) -retriever = InMemoryEmbeddingRetriever( - document_store=document_store, - top_k=top_k -) +retriever = InMemoryEmbeddingRetriever(document_store=document_store) From 332e3dc17b3a64506f4de883d0180de126dd8421 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Mon, 5 Feb 2024 08:22:58 +0000 Subject: [PATCH 17/43] feat: support en and de on the api to switch prompts --- gswikichat/api.py | 14 ++++++++++---- gswikichat/prompt.py | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index 8ea3097..a68f829 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -20,8 +20,13 @@ async def root(): @app.get("/api") -async def api(q, top_k = 3): - print("query: ", q) +async def api(q, top_k = 3, lang = 'en'): + if not lang in ['en', 'de']: + raise Exception("language must be 'en' or 'de'") + + print(f"{q=}") + print(f"{top_k=}") + print(f"{lang=}") query = Document(content=q) @@ -40,10 +45,11 @@ async def api(q, top_k = 3): for retrieverResult in retrieverResults: print(retrieverResult) - promptBuild = prompt_builder.run(question=q, documents=retrieverResults['documents']) + promptBuilder = prompt_builder[lang] + promptBuild = promptBuilder.run(question=q, documents=retrieverResults['documents']) prompt = promptBuild['prompt'] - print("prompt: ", prompt) + print(f"{prompt=}") response = llm.run(prompt=prompt, generation_kwargs=None) diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py index 36a6ebb..a9ea71b 100644 --- a/gswikichat/prompt.py +++ b/gswikichat/prompt.py @@ -9,7 +9,7 @@ # {% endfor %} # """ -prompt_template = """ +prompt_template_en = """ <|system|> You are a helpful assistant. You answer questions based on the given documents. Answer based on the documents only. If the information is not in the documents, @@ -25,6 +25,22 @@ <|assistant|> """ +prompt_template_de = """ +<|system|> +Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten. +Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist, +sage, dass du sie nicht finden kannst. +<|endoftext|> +<|user|> +Dokumente: +{% for doc in documents %} + {{ doc.content }} +{% endfor %} +Mit diesen Dokumenten, beantworte die folgende Frage: {{question}} +<|endoftext|> +<|assistant|> +""" + # prompt_template = """ # Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents. # Documents: @@ -33,4 +49,7 @@ # {% endfor %} # """ -prompt_builder = PromptBuilder(template=prompt_template) +prompt_builder = { + 'en': PromptBuilder(template=prompt_template_en), + 'de': PromptBuilder(template=prompt_template_de), +} From 6225fccfcc975f3ec2fa51b72bf914202c4488e4 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Mon, 5 Feb 2024 08:24:03 +0000 Subject: [PATCH 18/43] feat: cache embedding model during docker build --- Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Dockerfile b/Dockerfile index 796ccc4..9efb49c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,6 +51,11 @@ COPY --chmod=755 requirements.txt requirements.txt RUN pip install -r requirements.txt +# Load sentence-transformers model once in order to cache it in the image +# TODO: ARG / ENV for embedder model +RUN echo "from haystack.components.embedders import SentenceTransformersDocumentEmbedder\nSentenceTransformersDocumentEmbedder(model='svalabs/german-gpl-adapted-covid').warm_up()" | python3 + + # Install frontend dependencies COPY --chmod=755 frontend/package.json frontend/package.json COPY --chmod=755 frontend/yarn.lock frontend/yarn.lock From 4877807ef0212dc0ca531e6f0f7bdb5a88a7e82a Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Mon, 5 Feb 2024 08:24:18 +0000 Subject: [PATCH 19/43] wip: smaller chunk size, 5 sentences for now --- gswikichat/vector_store_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index 8cb1b28..3b1f4b0 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -64,7 +64,7 @@ # remove_repeated_substrings=False) # input_documents = cleaner.run(input_documents)['documents'] -splitter = DocumentSplitter(split_by="sentence", split_length=20, split_overlap=0) +splitter = DocumentSplitter(split_by="sentence", split_length=5, split_overlap=0) input_documents = splitter.run(input_documents)['documents'] document_store = InMemoryDocumentStore( From da9859dc2fd5db1e0d11be0d611dc32f697853bb Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Mon, 5 Feb 2024 08:24:35 +0000 Subject: [PATCH 20/43] chore: remove comment --- gswikichat/llm_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index 10ded24..1e86174 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -1,7 +1,6 @@ import os from haystack_integrations.components.generators.ollama import OllamaGenerator -# TODO: discolm prompt https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1 print(f"Setting up ollama with {os.getenv('MODEL')}") llm = OllamaGenerator( model=os.getenv("MODEL"), From 291aaaf703167a6ef3e14acdab8c4b8995b62522 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 09:33:06 +0000 Subject: [PATCH 21/43] feat: enable embeddings cache (for developmnet) --- gswikichat/vector_store_interface.py | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index 3b1f4b0..e348047 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -12,6 +12,8 @@ from haystack.components.preprocessors import DocumentCleaner HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') + +# disable this line to disable the embedding cache EMBEDDING_CACHE_FILE = '/tmp/gbnc_embeddings.json' top_k = 5 @@ -83,27 +85,29 @@ embedder.warm_up() -# if os.path.isfile(EMBEDDING_CACHE_FILE): -# print("[INFO] Loading embeddings from cache") -# -# with open(EMBEDDING_CACHE_FILE, 'r') as f: -# documentsDict = json.load(f) -# document_store.write_documents( -# documents=[Document.from_dict(d) for d in documentsDict], -# policy=DuplicatePolicy.OVERWRITE -# ) -# -# else: -if True: +if EMBEDDING_CACHE_FILE and os.path.isfile(EMBEDDING_CACHE_FILE): + print("[INFO] Loading embeddings from cache") + + with open(EMBEDDING_CACHE_FILE, 'r') as f: + documentsDict = json.load(f) + document_store.write_documents( + documents=[Document.from_dict(d) for d in documentsDict], + policy=DuplicatePolicy.OVERWRITE + ) + +else: + print("[INFO] Generating embeddings") + embedded = embedder.run(input_documents) document_store.write_documents( documents=embedded['documents'], policy=DuplicatePolicy.OVERWRITE ) - with open(EMBEDDING_CACHE_FILE, 'w') as f: - documentsDict = [Document.to_dict(d) for d in embedded['documents']] - json.dump(documentsDict, f) + if EMBEDDING_CACHE_FILE: + with open(EMBEDDING_CACHE_FILE, 'w') as f: + documentsDict = [Document.to_dict(d) for d in embedded['documents']] + json.dump(documentsDict, f) retriever = InMemoryEmbeddingRetriever(document_store=document_store) From 936d83ebe802349f5bdde67ae80180b733e52ae7 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 09:33:19 +0000 Subject: [PATCH 22/43] feat: add document cleaner --- gswikichat/vector_store_interface.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index e348047..36f0760 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -60,15 +60,16 @@ ), ] -# cleaner = DocumentCleaner( -# remove_empty_lines=True, -# remove_extra_whitespaces=True, -# remove_repeated_substrings=False) -# input_documents = cleaner.run(input_documents)['documents'] - splitter = DocumentSplitter(split_by="sentence", split_length=5, split_overlap=0) input_documents = splitter.run(input_documents)['documents'] +cleaner = DocumentCleaner( + remove_empty_lines=True, + remove_extra_whitespaces=True, + remove_repeated_substrings=False) +input_documents = cleaner.run(input_documents)['documents'] + + document_store = InMemoryDocumentStore( embedding_similarity_function="cosine", # embedding_dim=768, From 3e0b8f4957072a26c71dde12d05cb5dccd4cd418 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 09:36:21 +0000 Subject: [PATCH 23/43] docs: long docker run options --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8aa376c..d30cc74 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,13 @@ To build and run the container locally with hot reload on python files do: ``` DOCKER_BUILDKIT=1 docker build . -t gbnc docker run \ - -v "$(pwd)/gswikichat":/workspace/gswikichat \ - -v "$(pwd)/cache":/root/.cache \ - -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN - -p 8000:8000 \ - --rm -it \ + --env HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \ + --volume "$(pwd)/gswikichat":/workspace/gswikichat \ + --volume "$(pwd)/cache":/root/.cache \ + --publish 8000:8000 \ + --rm \ + --interactive \ + --tty \ --name gbnc \ gbnc ``` From edf5eb279b6747ecf6efc7a3120fdc16f54e1fc1 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 09:40:34 +0000 Subject: [PATCH 24/43] fix: access mode --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 9efb49c..0b6cf1c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -67,7 +67,7 @@ COPY --chmod=755 json_input json_input # Copy backend for production -COPY --chmod=644 gswikichat gswikichat +COPY --chmod=755 gswikichat gswikichat # Copy and build frontend for production (into the frontend/dist folder) From 63baf2b09d2c135ec8283f66b28cd0c9f683fd29 Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 10:47:46 +0000 Subject: [PATCH 25/43] fix: redraw loading animation on subsequent searches --- frontend/src/views/ChatView.vue | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/views/ChatView.vue b/frontend/src/views/ChatView.vue index 518a88f..9d79ddb 100644 --- a/frontend/src/views/ChatView.vue +++ b/frontend/src/views/ChatView.vue @@ -95,6 +95,7 @@ const inputFocused = ref(false) // } function search() { + response.value = undefined; displayResponse.value = true fetch(`/api?q=${inputText.value}`) .then((response) => response.json()) From 56a7b8c1d747515c1ae546a5022283b27c1ad5ca Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 10:49:56 +0000 Subject: [PATCH 26/43] wip: workaround for runpod.io http port forwarding --- gswikichat/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index a68f829..be2e4f9 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -16,7 +16,8 @@ @app.get("/") async def root(): - return RedirectResponse(url="/frontend/dist", status_code=302) + # return RedirectResponse(url="/frontend/dist", status_code=308) + return {} @app.get("/api") From 8e05473e3aeb82795baa6db2d9f315f58860b54d Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Fri, 9 Feb 2024 10:51:39 +0000 Subject: [PATCH 27/43] feat: switch to openchat 7b model --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 6202b20..b180bf9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ ENV PATH="/usr/local/ollama/bin:${PATH}" # Pull a language model (see LICENSE_STABLELM2.txt) -ARG MODEL=stablelm2:1.6b-zephyr +ARG MODEL=openchat ENV MODEL=${MODEL} RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL From 22b04d0ad0d2f77281063e317e2e6b1bbd6eea66 Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 17:41:44 +0100 Subject: [PATCH 28/43] added logging via logger with Handler to api.py; PEP8 formatted api.py --- gswikichat/api.py | 97 ++++++++++++++++++++++++++++------------ gswikichat/llm_config.py | 3 ++ 2 files changed, 72 insertions(+), 28 deletions(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index be2e4f9..d7f84ce 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -5,70 +5,111 @@ from .rag import embedder, retriever, prompt_builder, llm, answer_builder from haystack import Document +# TODO: Test if this can be included in the `__init__.py` file +import logging + +logging.basicConfig( + filename='gbnc.log', + encoding='utf-8', + level=logging.DEBUG +) + +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.DEBUG) +formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +handler.setFormatter(formatter) +logger.addHandler(handler) + +# End Logging Handler Formatting + +homepage = "/frontend/dist" app = FastAPI() app.mount( - "/frontend/dist", - StaticFiles(directory="frontend/dist", html=True), + homepage, + StaticFiles( + directory=homepage, + html=True + ), name="frontend" ) @app.get("/") async def root(): - # return RedirectResponse(url="/frontend/dist", status_code=308) - return {} + return RedirectResponse( + url=homepage, + status_code=308 + ) + # return {} @app.get("/api") -async def api(q, top_k = 3, lang = 'en'): +async def api(query, top_k=3, lang='en'): if not lang in ['en', 'de']: - raise Exception("language must be 'en' or 'de'") + raise Exception("language must be 'en' or 'de'") - print(f"{q=}") - print(f"{top_k=}") - print(f"{lang=}") + logger.debug(f'{query=}') # Assuming we change the input name + logger.debug(f'{top_k=}') + logger.debug(f'{top_k=}') - query = Document(content=q) + query = Document(content=query) - queryEmbedded = embedder.run([query]) - queryEmbedding = queryEmbedded['documents'][0].embedding + query_embedded = embedder.run([query]) + query_embedding = query_embedded['documents'][0].embedding - retrieverResults = retriever.run( - query_embedding=list(queryEmbedding), + retreiver_results = retriever.run( + query_embedding=list(query_embedding), filters=None, top_k=top_k, scale_score=None, return_embedding=None ) - print("retriever results:") - for retrieverResult in retrieverResults: - print(retrieverResult) + logger.debug('retriever results:') + for retriever_result in retriever_results: + logger.debug(retriever_result_) + + prompt_builder = prompt_builders[lang] - promptBuilder = prompt_builder[lang] - promptBuild = promptBuilder.run(question=q, documents=retrieverResults['documents']) - prompt = promptBuild['prompt'] + prompt_build = prompt_builder.run( + question=query.content, # As a Document instance, .content returns a string + documents=retriever_results['documents'] + ) + + prompt = prompt_build['prompt'] - print(f"{prompt=}") + logger.debug(f'{prompt=}') response = llm.run(prompt=prompt, generation_kwargs=None) - answerBuild = answer_builder.run( - query=q, + answer_build = answer_builder.run( + query=query.content, # As a Document class, .content returns the string replies=response['replies'], meta=response['meta'], - documents=retrieverResults['documents'], + documents=retriever_results['documents'], pattern=None, reference_pattern=None ) - print("answerBuild", answerBuild) - answer = answerBuild['answers'][0] + logger.debug(f'{answer_build=}') + + answer = answer_build['answers'][0] - sources= [{ "src": d.meta['src'], "content": d.content, "score": d.score } for d in answer.documents] + sources = [ + { + "src": d_.meta['src'], + "content": d_.content, + "score": d_.score + } for d_ in answer.documents + ] - print("answer", answer) + logger.debug(f'{answer=}') return { "answer": answer.data, diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index 1e86174..6f1a732 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -1,6 +1,9 @@ import os from haystack_integrations.components.generators.ollama import OllamaGenerator +# import logging +# logger = logging.getLogger() + print(f"Setting up ollama with {os.getenv('MODEL')}") llm = OllamaGenerator( model=os.getenv("MODEL"), From 10f6b2191f6bc2d0ac660023eb83f66519ba599e Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 17:45:17 +0100 Subject: [PATCH 29/43] debugging use of homepage instead of hard coded endpoint values --- gswikichat/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gswikichat/api.py b/gswikichat/api.py index d7f84ce..bb27a22 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -7,6 +7,7 @@ # TODO: Test if this can be included in the `__init__.py` file import logging +import sys logging.basicConfig( filename='gbnc.log', From bfbd245f1db885c08187a7cbf76355ece17ff223 Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 18:11:05 +0100 Subject: [PATCH 30/43] returning to previous to restart without errors --- gswikichat/api.py | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index bb27a22..44ff6b3 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -5,38 +5,12 @@ from .rag import embedder, retriever, prompt_builder, llm, answer_builder from haystack import Document -# TODO: Test if this can be included in the `__init__.py` file -import logging -import sys - -logging.basicConfig( - filename='gbnc.log', - encoding='utf-8', - level=logging.DEBUG -) - -logger = logging.getLogger() -logger.setLevel(logging.DEBUG) - -handler = logging.StreamHandler(sys.stdout) -handler.setLevel(logging.DEBUG) -formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -handler.setFormatter(formatter) -logger.addHandler(handler) - -# End Logging Handler Formatting - homepage = "/frontend/dist" app = FastAPI() app.mount( - homepage, - StaticFiles( - directory=homepage, - html=True - ), + "/frontend/dist", + StaticFiles(directory="frontend/dist", html=True), name="frontend" ) @@ -44,7 +18,7 @@ @app.get("/") async def root(): return RedirectResponse( - url=homepage, + url="/frontend/dist", status_code=308 ) # return {} From 7b6ba0a29b6ea725b37b11af88620d8d995699ec Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 18:57:10 +0100 Subject: [PATCH 31/43] renewed app.mount; bug fixed PEP8 changes in api.py; reformatted rag.py inputs; tests timed out --- frontend/src/views/ChatView.vue | 2 +- gswikichat/api.py | 26 ++++++++++++++++++++++---- gswikichat/prompt.py | 6 +++--- gswikichat/rag.py | 4 ---- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/frontend/src/views/ChatView.vue b/frontend/src/views/ChatView.vue index 9d79ddb..981d2d7 100644 --- a/frontend/src/views/ChatView.vue +++ b/frontend/src/views/ChatView.vue @@ -97,7 +97,7 @@ const inputFocused = ref(false) function search() { response.value = undefined; displayResponse.value = true - fetch(`/api?q=${inputText.value}`) + fetch(`/api?query=${inputText.value}`) .then((response) => response.json()) .then((data) => { response.value = data diff --git a/gswikichat/api.py b/gswikichat/api.py index 44ff6b3..d820b75 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -2,10 +2,28 @@ from fastapi.staticfiles import StaticFiles from fastapi import FastAPI -from .rag import embedder, retriever, prompt_builder, llm, answer_builder +from .rag import answer_builder +from .llm_config import llm +from .prompt import prompt_builders +from .vector_store_interface import embedder, retriever, input_documents + from haystack import Document -homepage = "/frontend/dist" +import logging +import sys + +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.DEBUG) +formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + +static_dir = 'frontend/dist' +homepage = f'/{static_dir}' app = FastAPI() app.mount( @@ -38,7 +56,7 @@ async def api(query, top_k=3, lang='en'): query_embedded = embedder.run([query]) query_embedding = query_embedded['documents'][0].embedding - retreiver_results = retriever.run( + retriever_results = retriever.run( query_embedding=list(query_embedding), filters=None, top_k=top_k, @@ -47,7 +65,7 @@ async def api(query, top_k=3, lang='en'): ) logger.debug('retriever results:') - for retriever_result in retriever_results: + for retriever_result_ in retriever_results: logger.debug(retriever_result_) prompt_builder = prompt_builders[lang] diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py index a9ea71b..a2d82af 100644 --- a/gswikichat/prompt.py +++ b/gswikichat/prompt.py @@ -49,7 +49,7 @@ # {% endfor %} # """ -prompt_builder = { - 'en': PromptBuilder(template=prompt_template_en), - 'de': PromptBuilder(template=prompt_template_de), +prompt_builders = { + 'en': PromptBuilder(template=prompt_template_en), + 'de': PromptBuilder(template=prompt_template_de), } diff --git a/gswikichat/rag.py b/gswikichat/rag.py index 8e198f0..05d7c2b 100644 --- a/gswikichat/rag.py +++ b/gswikichat/rag.py @@ -2,10 +2,6 @@ from haystack import Pipeline from haystack.components.builders.answer_builder import AnswerBuilder -from .llm_config import llm -from .prompt import prompt_builder -from .vector_store_interface import embedder, retriever, input_documents - answer_builder = AnswerBuilder() # rag_pipeline = Pipeline() From 0428f871c16c6a042f5dda41b18a098eeab34e4e Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 19:10:03 +0100 Subject: [PATCH 32/43] returned to stablelm2 model for testing purposes. PEP8 upgrades in api.py included; logger in api.py functional --- Dockerfile | 3 ++- gswikichat/api.py | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 86fc2ac..6d4fa8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,8 @@ ENV PATH="/usr/local/ollama/bin:${PATH}" # Pull a language model (see LICENSE_STABLELM2.txt) -ARG MODEL=openchat +# ARG MODEL=openchat +ARG MODEL=stablelm2:1.6b-zephyr ENV MODEL=${MODEL} RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL diff --git a/gswikichat/api.py b/gswikichat/api.py index d820b75..1eaf348 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -12,15 +12,29 @@ import logging import sys -logger = logging.getLogger() +# TODO: Test if this can be added to the `__init__.py` file +# TODO: Add volume to Dockerfile for `gbnc_api.log` file +# Source: https://docs.python.org/3/howto/logging.html +logging.basicConfig( + filename='gbnc_api.log', + encoding='utf-8', + level=logging.DEBUG +) + +# Source: https://stackoverflow.com/questions/14058453/ +# making-python-loggers-output-all-messages-to-stdout-in-addition-to-log-file +logger = logging.getLogger('gswikicat api') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) handler.setFormatter(formatter) logger.addHandler(handler) +# End of logging logger configuration + static_dir = 'frontend/dist' homepage = f'/{static_dir}' From 8104dde885b26cc249481c717894e07c99bda895 Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 19:52:46 +0100 Subject: [PATCH 33/43] added OLLAMA_MODEL_NAME and OLLAMA_URL as environment variables; called them in llm_config.py --- Dockerfile | 13 ++++++++----- gswikichat/api.py | 10 +++++----- gswikichat/llm_config.py | 20 +++++++++++++++----- gswikichat/rag.py | 3 +-- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6d4fa8e..438ac1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,12 +36,15 @@ RUN npm install -g yarn COPY --from=ollama /usr/bin/ollama /usr/local/ollama/bin/ollama ENV PATH="/usr/local/ollama/bin:${PATH}" - # Pull a language model (see LICENSE_STABLELM2.txt) -# ARG MODEL=openchat -ARG MODEL=stablelm2:1.6b-zephyr -ENV MODEL=${MODEL} -RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL +# ARG OLLAMA_MODEL_NAME=openchat +ARG OLLAMA_MODEL_NAME=stablelm2:1.6b-zephyr +ARG OLLAMA_URL=http://localhost:11434 + +ENV OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME} +ENV OLLAMA_URL=${OLLAMA_URL} + +RUN ollama serve & while ! curl ${OLLAMA_URL}; do sleep 1; done; ollama pull $OLLAMA_MODEL_NAME # Setup the custom API and frontend diff --git a/gswikichat/api.py b/gswikichat/api.py index 1eaf348..f2f6e50 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -36,13 +36,13 @@ # End of logging logger configuration -static_dir = 'frontend/dist' -homepage = f'/{static_dir}' +STATIC_DIR = 'frontend/dist' +LANDING_PAGE = f'/{STATIC_DIR}' app = FastAPI() app.mount( - "/frontend/dist", - StaticFiles(directory="frontend/dist", html=True), + LANDING_PAGE, + StaticFiles(directory=STATIC_DIR, html=True), name="frontend" ) @@ -50,7 +50,7 @@ @app.get("/") async def root(): return RedirectResponse( - url="/frontend/dist", + url=LANDING_PAGE, status_code=308 ) # return {} diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index 6f1a732..0179bc1 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -1,11 +1,21 @@ import os from haystack_integrations.components.generators.ollama import OllamaGenerator -# import logging -# logger = logging.getLogger() +import logging +logger = logging.getLogger() -print(f"Setting up ollama with {os.getenv('MODEL')}") +OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") +OLLAMA_URL = os.environ.get("OLLAMA_URL") +OLLAMA_GENERATE_URL = f"{OLLAMA_URL}/api/generate" + +logger.info(f'Using {OLLAMA_MODEL_NAME=}') +logger.info(f'Endpoint: {OLLAMA_URL=}') +logger.info(f'Generate: {OLLAMA_GENERATE_URL=}') + +logger.debug(f'I AM HERE') + +print(f"Setting up ollama with {OLLAMA_MODEL_NAME}") llm = OllamaGenerator( - model=os.getenv("MODEL"), - url="http://localhost:11434/api/generate" + model=OLLAMA_MODEL_NAME, + url=OLLAMA_GENERATE_URL ) diff --git a/gswikichat/rag.py b/gswikichat/rag.py index 05d7c2b..b9bb392 100644 --- a/gswikichat/rag.py +++ b/gswikichat/rag.py @@ -1,5 +1,4 @@ - -from haystack import Pipeline +# from haystack import Pipeline from haystack.components.builders.answer_builder import AnswerBuilder answer_builder = AnswerBuilder() From fbc45916cddbfc54241d94bf5967048d48570880 Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 20:05:09 +0100 Subject: [PATCH 34/43] created logger.py to serve get_logger to all modules --- gswikichat/__init__.py | 1 + gswikichat/api.py | 29 +++-------------------------- gswikichat/llm_config.py | 10 +++++++--- gswikichat/logger.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 29 deletions(-) create mode 100644 gswikichat/logger.py diff --git a/gswikichat/__init__.py b/gswikichat/__init__.py index 0a0e47b..a127f79 100644 --- a/gswikichat/__init__.py +++ b/gswikichat/__init__.py @@ -1 +1,2 @@ +# from .logger import logger from .api import * diff --git a/gswikichat/api.py b/gswikichat/api.py index f2f6e50..882ce1a 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -8,33 +8,10 @@ from .vector_store_interface import embedder, retriever, input_documents from haystack import Document +from .logger import get_logger -import logging -import sys - -# TODO: Test if this can be added to the `__init__.py` file -# TODO: Add volume to Dockerfile for `gbnc_api.log` file -# Source: https://docs.python.org/3/howto/logging.html -logging.basicConfig( - filename='gbnc_api.log', - encoding='utf-8', - level=logging.DEBUG -) - -# Source: https://stackoverflow.com/questions/14058453/ -# making-python-loggers-output-all-messages-to-stdout-in-addition-to-log-file -logger = logging.getLogger('gswikicat api') -logger.setLevel(logging.DEBUG) - -handler = logging.StreamHandler(sys.stdout) -handler.setLevel(logging.DEBUG) -formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -handler.setFormatter(formatter) -logger.addHandler(handler) -# End of logging logger configuration - +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) STATIC_DIR = 'frontend/dist' LANDING_PAGE = f'/{STATIC_DIR}' diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index 0179bc1..fbcdf91 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -1,8 +1,11 @@ import os from haystack_integrations.components.generators.ollama import OllamaGenerator -import logging -logger = logging.getLogger() +from .logger import get_logger + +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) + OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") OLLAMA_URL = os.environ.get("OLLAMA_URL") @@ -14,7 +17,8 @@ logger.debug(f'I AM HERE') -print(f"Setting up ollama with {OLLAMA_MODEL_NAME}") +logger.info(f"Setting up ollama with {OLLAMA_MODEL_NAME}") + llm = OllamaGenerator( model=OLLAMA_MODEL_NAME, url=OLLAMA_GENERATE_URL diff --git a/gswikichat/logger.py b/gswikichat/logger.py new file mode 100644 index 0000000..5d89447 --- /dev/null +++ b/gswikichat/logger.py @@ -0,0 +1,30 @@ +import logging +import sys + + +def get_logger(name): + # Create a logger + # Source: https://docs.python.org/3/howto/logging.html + logging.basicConfig( + filename='gbnc_api.log', + encoding='utf-8', + level=logging.DEBUG + ) + + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) # Set the logging level + + # Source: stackoverflow.com/questions/14058453/ + # making-python-loggers-output-all-messages- + # to-stdout-in-addition-to-log-file + + # Create console handler and set level to debug + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + + return logger From caecfd19a33be1ed94b423a5f8d8e3e73a3ac032 Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 20:17:08 +0100 Subject: [PATCH 35/43] created a rag_pipeline in the rag.py based on the usage in api.py; removed rag_piipeline from api.py; introduced rag_pipeline from rag.py into api.py --- gswikichat/__init__.py | 1 - gswikichat/api.py | 50 +++---------------------- gswikichat/rag.py | 83 ++++++++++++++++++++++++++++++------------ 3 files changed, 65 insertions(+), 69 deletions(-) diff --git a/gswikichat/__init__.py b/gswikichat/__init__.py index a127f79..0a0e47b 100644 --- a/gswikichat/__init__.py +++ b/gswikichat/__init__.py @@ -1,2 +1 @@ -# from .logger import logger from .api import * diff --git a/gswikichat/api.py b/gswikichat/api.py index 882ce1a..e965841 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -2,10 +2,7 @@ from fastapi.staticfiles import StaticFiles from fastapi import FastAPI -from .rag import answer_builder -from .llm_config import llm -from .prompt import prompt_builders -from .vector_store_interface import embedder, retriever, input_documents +from .rag import rag_pipeline from haystack import Document from .logger import get_logger @@ -40,51 +37,14 @@ async def api(query, top_k=3, lang='en'): logger.debug(f'{query=}') # Assuming we change the input name logger.debug(f'{top_k=}') - logger.debug(f'{top_k=}') - - query = Document(content=query) + logger.debug(f'{lang=}') - query_embedded = embedder.run([query]) - query_embedding = query_embedded['documents'][0].embedding - - retriever_results = retriever.run( - query_embedding=list(query_embedding), - filters=None, + answer = rag_pipeline( + query=query, top_k=top_k, - scale_score=None, - return_embedding=None - ) - - logger.debug('retriever results:') - for retriever_result_ in retriever_results: - logger.debug(retriever_result_) - - prompt_builder = prompt_builders[lang] - - prompt_build = prompt_builder.run( - question=query.content, # As a Document instance, .content returns a string - documents=retriever_results['documents'] + lang=lang ) - prompt = prompt_build['prompt'] - - logger.debug(f'{prompt=}') - - response = llm.run(prompt=prompt, generation_kwargs=None) - - answer_build = answer_builder.run( - query=query.content, # As a Document class, .content returns the string - replies=response['replies'], - meta=response['meta'], - documents=retriever_results['documents'], - pattern=None, - reference_pattern=None - ) - - logger.debug(f'{answer_build=}') - - answer = answer_build['answers'][0] - sources = [ { "src": d_.meta['src'], diff --git a/gswikichat/rag.py b/gswikichat/rag.py index b9bb392..44c7e2b 100644 --- a/gswikichat/rag.py +++ b/gswikichat/rag.py @@ -1,26 +1,63 @@ # from haystack import Pipeline +from haystack import Document from haystack.components.builders.answer_builder import AnswerBuilder -answer_builder = AnswerBuilder() - -# rag_pipeline = Pipeline() -# rag_pipeline.add_component("text_embedder", embedder) -# rag_pipeline.add_component("retriever", retriever) -# # rag_pipeline.add_component("writer", writer) -# rag_pipeline.add_component("prompt_builder", prompt_builder) -# rag_pipeline.add_component("llm", llm) -# rag_pipeline.add_component("answer_builder", answer_builder) - -# # rag_pipeline.connect("embedder", "writer") -# rag_pipeline.connect("retriever.documents", "text_embedder") -# rag_pipeline.connect("retriever", "prompt_builder.documents") -# rag_pipeline.connect("prompt_builder", "llm") -# rag_pipeline.connect("llm.replies", "answer_builder.replies") -# rag_pipeline.connect("llm.metadata", "answer_builder.meta") -# rag_pipeline.connect("retriever", "answer_builder.documents") - -# rag_pipeline.run( -# { -# "text_embedder": {"documents": input_documents} -# } -# ) +from .llm_config import llm +from .logger import get_logger +from .prompt import prompt_builders +from .vector_store_interface import embedder, retriever, input_documents + +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) + + +def rag_pipeline(query: str = None, top_k: int = 3, lang: str = 'de'): + + assert (query is not None) + + if isinstance(query, str): + query = Document(content=query) + + assert (isinstance(query, Document)) + + query_embedded = embedder.run([query]) + query_embedding = query_embedded['documents'][0].embedding + + retriever_results = retriever.run( + query_embedding=list(query_embedding), + filters=None, + top_k=top_k, + scale_score=None, + return_embedding=None + ) + + logger.debug('retriever results:') + for retriever_result_ in retriever_results: + logger.debug(retriever_result_) + + prompt_builder = prompt_builders[lang] + + prompt_build = prompt_builder.run( + question=query.content, # As a Document instance, .content returns a string + documents=retriever_results['documents'] + ) + + prompt = prompt_build['prompt'] + + logger.debug(f'{prompt=}') + + response = llm.run(prompt=prompt, generation_kwargs=None) + + answer_builder = AnswerBuilder() + answer_build = answer_builder.run( + query=query.content, # As a Document class, .content returns the string + replies=response['replies'], + meta=response['meta'], + documents=retriever_results['documents'], + pattern=None, + reference_pattern=None + ) + + logger.debug(f'{answer_build=}') + + return answer_build['answers'][0] From 5c0b4d0de1a9a610922a457d97fa928024775e41 Mon Sep 17 00:00:00 2001 From: Jonathan Fraine Date: Fri, 9 Feb 2024 20:55:35 +0100 Subject: [PATCH 36/43] UPdated with PEP8 formatting in vector_store_interface.py --- gswikichat/llm_config.py | 1 - gswikichat/prompt.py | 25 ++-------- gswikichat/vector_store_interface.py | 69 +++++++++++++++++----------- 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index fbcdf91..0ada3ce 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -6,7 +6,6 @@ # Create logger instance from base logger config in `logger.py` logger = get_logger(__name__) - OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") OLLAMA_URL = os.environ.get("OLLAMA_URL") OLLAMA_GENERATE_URL = f"{OLLAMA_URL}/api/generate" diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py index a2d82af..d74d3e4 100644 --- a/gswikichat/prompt.py +++ b/gswikichat/prompt.py @@ -1,14 +1,5 @@ from haystack.components.builders.prompt_builder import PromptBuilder -# prompt_template = """ -# Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents. -# Documents: -# {% for doc in documents %} -# If {{ doc.content }} answers the Question: {{question}} -# Then return {{ doc.meta["src"] }} -# {% endfor %} -# """ - prompt_template_en = """ <|system|> You are a helpful assistant. You answer questions based on the given documents. @@ -17,8 +8,8 @@ <|endoftext|> <|user|> Documents: -{% for doc in documents %} - {{ doc.content }} +{% for doc_ in documents %} + {{ doc_.content }} {% endfor %} With this documents, answer the following question: {{question}} <|endoftext|> @@ -33,22 +24,14 @@ <|endoftext|> <|user|> Dokumente: -{% for doc in documents %} - {{ doc.content }} +{% for doc_ in documents %} + {{ doc_.content }} {% endfor %} Mit diesen Dokumenten, beantworte die folgende Frage: {{question}} <|endoftext|> <|assistant|> """ -# prompt_template = """ -# Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents. -# Documents: -# If {{ doc.content }} answers the Question: {{question}} -# Then only return {{ doc.meta["src"] }} and nothing at all. -# {% endfor %} -# """ - prompt_builders = { 'en': PromptBuilder(template=prompt_template_en), 'de': PromptBuilder(template=prompt_template_de), diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index 36f0760..1aab187 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -11,6 +11,12 @@ from haystack.components.preprocessors import DocumentSplitter from haystack.components.preprocessors import DocumentCleaner + +from .logger import get_logger + +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) + HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') # disable this line to disable the embedding cache @@ -19,31 +25,33 @@ top_k = 5 input_documents = [] +# TODO: Add the json strings as env variables json_dir = 'json_input' json_fname = 'excellent-articles_10.json' json_fpath = os.path.join(json_dir, json_fname) if os.path.isfile(json_fpath): - print(f'[INFO] Loading data from {json_fpath}') + logger.info(f'Loading data from {json_fpath}') with open(json_fpath, 'r') as finn: json_obj = json.load(finn) if isinstance(json_obj, dict): - for k, v in tqdm(json_obj.items()): - print(f"Loading {k}") - input_documents.append(Document(content=v, meta={"src": k})) - + input_documents = [ + Document( + content=content_, + meta={"src": url_} + ) + for url_, content_ in tqdm(json_obj.items()) + ] elif isinstance(json_obj, list): - for obj_ in tqdm(json_obj): - url = obj_['meta'] - content = obj_['content'] - input_documents.append( - Document( - content=content, - meta={'src': url} - ) + input_documents = [ + Document( + content=obj_['content'], + meta={'src': obj_['meta']} ) + for obj_ in tqdm(json_obj) + ] else: input_documents = [ Document( @@ -60,13 +68,18 @@ ), ] -splitter = DocumentSplitter(split_by="sentence", split_length=5, split_overlap=0) +splitter = DocumentSplitter( + split_by="sentence", + split_length=5, + split_overlap=0 +) input_documents = splitter.run(input_documents)['documents'] cleaner = DocumentCleaner( - remove_empty_lines=True, - remove_extra_whitespaces=True, - remove_repeated_substrings=False) + remove_empty_lines=True, + remove_extra_whitespaces=True, + remove_repeated_substrings=False +) input_documents = cleaner.run(input_documents)['documents'] @@ -78,7 +91,7 @@ # https://huggingface.co/svalabs/german-gpl-adapted-covid sentence_transformer_model = 'svalabs/german-gpl-adapted-covid' -print(f'Sentence Transformer Name: {sentence_transformer_model}') +logger.info(f'Sentence Transformer Name: {sentence_transformer_model}') embedder = SentenceTransformersDocumentEmbedder( model=sentence_transformer_model, @@ -87,17 +100,17 @@ if EMBEDDING_CACHE_FILE and os.path.isfile(EMBEDDING_CACHE_FILE): - print("[INFO] Loading embeddings from cache") + logger.info('Loading embeddings from cache') - with open(EMBEDDING_CACHE_FILE, 'r') as f: - documentsDict = json.load(f) + with open(EMBEDDING_CACHE_FILE, 'r') as f_in: + documents_dict = json.load(f_in) document_store.write_documents( - documents=[Document.from_dict(d) for d in documentsDict], + documents=[Document.from_dict(d_) for d_ in documents_dict], policy=DuplicatePolicy.OVERWRITE ) else: - print("[INFO] Generating embeddings") + logger.debug("Generating embeddings") embedded = embedder.run(input_documents) document_store.write_documents( @@ -106,9 +119,11 @@ ) if EMBEDDING_CACHE_FILE: - with open(EMBEDDING_CACHE_FILE, 'w') as f: - documentsDict = [Document.to_dict(d) for d in embedded['documents']] - json.dump(documentsDict, f) + with open(EMBEDDING_CACHE_FILE, 'w') as f_out: + documents_dict = [ + Document.to_dict(d_) + for d_ in embedded['documents'] + ] + json.dump(documents_dict, f_out) retriever = InMemoryEmbeddingRetriever(document_store=document_store) - From 8833af79e2e771b6578f0b34ae8a74d56edc0e8a Mon Sep 17 00:00:00 2001 From: roti Date: Mon, 12 Feb 2024 07:58:03 +0000 Subject: [PATCH 37/43] chore(Dockerfile): install python deps early To prevent huge redownloads on llm change. --- Dockerfile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 438ac1e..dc8d862 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,15 @@ RUN npm install -g yarn COPY --from=ollama /usr/bin/ollama /usr/local/ollama/bin/ollama ENV PATH="/usr/local/ollama/bin:${PATH}" + +# Setup the app in workspace +WORKDIR /workspace + +# Install backend dependencies +COPY --chmod=755 requirements.txt requirements.txt +RUN pip install -r requirements.txt + + # Pull a language model (see LICENSE_STABLELM2.txt) # ARG OLLAMA_MODEL_NAME=openchat ARG OLLAMA_MODEL_NAME=stablelm2:1.6b-zephyr @@ -47,14 +56,6 @@ ENV OLLAMA_URL=${OLLAMA_URL} RUN ollama serve & while ! curl ${OLLAMA_URL}; do sleep 1; done; ollama pull $OLLAMA_MODEL_NAME -# Setup the custom API and frontend -WORKDIR /workspace - -# Install backend dependencies -COPY --chmod=755 requirements.txt requirements.txt -RUN pip install -r requirements.txt - - # Load sentence-transformers model once in order to cache it in the image # TODO: ARG / ENV for embedder model RUN echo "from haystack.components.embedders import SentenceTransformersDocumentEmbedder\nSentenceTransformersDocumentEmbedder(model='svalabs/german-gpl-adapted-covid').warm_up()" | python3 From 9ee8a32b3f417e04c1c1fd51bee97e3ee8997cc6 Mon Sep 17 00:00:00 2001 From: roti Date: Mon, 12 Feb 2024 12:55:12 +0000 Subject: [PATCH 38/43] fix(sentence-transformers): use cuda if available --- gswikichat/vector_store_interface.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index 1aab187..5cb5e66 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -11,6 +11,7 @@ from haystack.components.preprocessors import DocumentSplitter from haystack.components.preprocessors import DocumentCleaner +import torch from .logger import get_logger @@ -25,6 +26,13 @@ top_k = 5 input_documents = [] +device = "cpu" + +if torch.cuda.is_available(): + logger.info('GPU is available.') + device = "cuda" + + # TODO: Add the json strings as env variables json_dir = 'json_input' json_fname = 'excellent-articles_10.json' @@ -95,6 +103,7 @@ embedder = SentenceTransformersDocumentEmbedder( model=sentence_transformer_model, + device=device ) embedder.warm_up() From b2357e3f0bd8611115975ec82d7138e1b555f4a8 Mon Sep 17 00:00:00 2001 From: roti Date: Mon, 12 Feb 2024 16:00:07 +0000 Subject: [PATCH 39/43] fix(frontend): run from webserver root --- frontend/vite.config.ts | 2 +- gswikichat/api.py | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index c29f416..0f4d1b0 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -5,7 +5,7 @@ import vue from '@vitejs/plugin-vue' // https://vitejs.dev/config/ export default defineConfig({ - base: '/frontend/dist', + base: '/', plugins: [ vue(), ], diff --git a/gswikichat/api.py b/gswikichat/api.py index e965841..749ffa7 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -1,4 +1,4 @@ -from fastapi.responses import RedirectResponse +from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from fastapi import FastAPI @@ -10,25 +10,23 @@ # Create logger instance from base logger config in `logger.py` logger = get_logger(__name__) -STATIC_DIR = 'frontend/dist' -LANDING_PAGE = f'/{STATIC_DIR}' +FRONTEND_STATIC_DIR = './frontend/dist' app = FastAPI() + app.mount( - LANDING_PAGE, - StaticFiles(directory=STATIC_DIR, html=True), - name="frontend" + "/assets", + StaticFiles(directory=f"{FRONTEND_STATIC_DIR}/assets"), + name="frontend-assets" ) - @app.get("/") async def root(): - return RedirectResponse( - url=LANDING_PAGE, - status_code=308 - ) - # return {} + return FileResponse(f"{FRONTEND_STATIC_DIR}/index.html") +@app.get("/favicon.ico") +async def favicon(): + return FileResponse(f"{FRONTEND_STATIC_DIR}/favicon.ico") @app.get("/api") async def api(query, top_k=3, lang='en'): From b518abf0813aea96db3663309b4bdc0023923847 Mon Sep 17 00:00:00 2001 From: roti Date: Mon, 12 Feb 2024 16:33:18 +0000 Subject: [PATCH 40/43] feat: store embedding cache in volume --- README.md | 2 +- gswikichat/vector_store_interface.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d30cc74..d46be0d 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ DOCKER_BUILDKIT=1 docker build . -t gbnc docker run \ --env HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \ --volume "$(pwd)/gswikichat":/workspace/gswikichat \ - --volume "$(pwd)/cache":/root/.cache \ + --volume gbnc_cache:/root/.cache --publish 8000:8000 \ --rm \ --interactive \ diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index 5cb5e66..95d52db 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -21,7 +21,7 @@ HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') # disable this line to disable the embedding cache -EMBEDDING_CACHE_FILE = '/tmp/gbnc_embeddings.json' +EMBEDDING_CACHE_FILE = '/root/.cache/gbnc_embeddings.json' top_k = 5 input_documents = [] From 69800b079af08ec9c11187e519b201d8b36d2d56 Mon Sep 17 00:00:00 2001 From: roti Date: Mon, 12 Feb 2024 22:29:39 +0000 Subject: [PATCH 41/43] feat(start.sh): pull llm using ollama (if not built into container) closes #35 --- start.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/start.sh b/start.sh index cf21953..b7a27f8 100644 --- a/start.sh +++ b/start.sh @@ -1,11 +1,13 @@ #!/bin/bash +set -e + if [[ $PUBLIC_KEY ]] then mkdir -p ~/.ssh chmod 700 ~/.ssh cd ~/.ssh - echo $PUBLIC_KEY >> authorized_keys + echo "$PUBLIC_KEY" >> authorized_keys chmod 700 -R ~/.ssh cd / service ssh start @@ -16,10 +18,17 @@ fi echo "Starting ollama" ollama serve & +while ! curl "$OLLAMA_URL"; do + sleep 1 +done + +echo "Pulling $OLLAMA_MODEL_NAME from ollama library" +ollama pull "$OLLAMA_MODEL_NAME" + cd /workspace echo "Starting api" uvicorn gswikichat:app --reload --host 0.0.0.0 --port 8000 & -echo "Sleeping..." +echo "Ready" sleep infinity From 7803649bb5c718bcfd9cfc795d0d400bc13f8ac9 Mon Sep 17 00:00:00 2001 From: roti Date: Mon, 12 Feb 2024 22:34:10 +0000 Subject: [PATCH 42/43] feat(ollama): use chat api to leverage prompt templates closes #34 --- gswikichat/api.py | 2 +- gswikichat/llm_config.py | 13 ++++++------- gswikichat/prompt.py | 24 +++++++----------------- gswikichat/rag.py | 40 +++++++++++++++++++++++----------------- 4 files changed, 37 insertions(+), 42 deletions(-) diff --git a/gswikichat/api.py b/gswikichat/api.py index 749ffa7..c97ff2a 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -54,6 +54,6 @@ async def api(query, top_k=3, lang='en'): logger.debug(f'{answer=}') return { - "answer": answer.data, + "answer": answer.data.content, "sources": sources } diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index 0ada3ce..5af6c1c 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -1,5 +1,5 @@ import os -from haystack_integrations.components.generators.ollama import OllamaGenerator +from haystack_integrations.components.generators.ollama import OllamaChatGenerator from .logger import get_logger @@ -8,17 +8,16 @@ OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") OLLAMA_URL = os.environ.get("OLLAMA_URL") -OLLAMA_GENERATE_URL = f"{OLLAMA_URL}/api/generate" +OLLAMA_CHAT_URL = f"{OLLAMA_URL}/api/chat" logger.info(f'Using {OLLAMA_MODEL_NAME=}') logger.info(f'Endpoint: {OLLAMA_URL=}') -logger.info(f'Generate: {OLLAMA_GENERATE_URL=}') - -logger.debug(f'I AM HERE') +logger.info(f'Generate: {OLLAMA_CHAT_URL=}') logger.info(f"Setting up ollama with {OLLAMA_MODEL_NAME}") -llm = OllamaGenerator( +llm = OllamaChatGenerator( model=OLLAMA_MODEL_NAME, - url=OLLAMA_GENERATE_URL + url=OLLAMA_CHAT_URL, + timeout=120 ) diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py index d74d3e4..d3306fc 100644 --- a/gswikichat/prompt.py +++ b/gswikichat/prompt.py @@ -1,38 +1,28 @@ from haystack.components.builders.prompt_builder import PromptBuilder prompt_template_en = """ -<|system|> -You are a helpful assistant. You answer questions based on the given documents. -Answer based on the documents only. If the information is not in the documents, -say that you cannot find the information. -<|endoftext|> -<|user|> Documents: {% for doc_ in documents %} {{ doc_.content }} {% endfor %} With this documents, answer the following question: {{question}} -<|endoftext|> -<|assistant|> """ prompt_template_de = """ -<|system|> -Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten. -Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist, -sage, dass du sie nicht finden kannst. -<|endoftext|> -<|user|> Dokumente: {% for doc_ in documents %} {{ doc_.content }} {% endfor %} Mit diesen Dokumenten, beantworte die folgende Frage: {{question}} -<|endoftext|> -<|assistant|> """ -prompt_builders = { +system_prompts = { + 'en': 'You are a helpful assistant. You answer questions based on the given documents. Answer based on the documents only. If the information is not in the documents, say that you cannot find the information.', + 'de': 'Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten. Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist, sage, dass du sie nicht finden kannst.', +} + +user_prompt_builders = { 'en': PromptBuilder(template=prompt_template_en), 'de': PromptBuilder(template=prompt_template_de), } + diff --git a/gswikichat/rag.py b/gswikichat/rag.py index 44c7e2b..b916686 100644 --- a/gswikichat/rag.py +++ b/gswikichat/rag.py @@ -1,26 +1,21 @@ # from haystack import Pipeline from haystack import Document from haystack.components.builders.answer_builder import AnswerBuilder +from haystack.dataclasses import ChatMessage from .llm_config import llm from .logger import get_logger -from .prompt import prompt_builders +from .prompt import user_prompt_builders, system_prompts from .vector_store_interface import embedder, retriever, input_documents # Create logger instance from base logger config in `logger.py` logger = get_logger(__name__) -def rag_pipeline(query: str = None, top_k: int = 3, lang: str = 'de'): +def rag_pipeline(query: str, top_k: int = 3, lang: str = 'de'): - assert (query is not None) - - if isinstance(query, str): - query = Document(content=query) - - assert (isinstance(query, Document)) - - query_embedded = embedder.run([query]) + query_document = Document(content=query) + query_embedded = embedder.run([query_document]) query_embedding = query_embedded['documents'][0].embedding retriever_results = retriever.run( @@ -35,24 +30,35 @@ def rag_pipeline(query: str = None, top_k: int = 3, lang: str = 'de'): for retriever_result_ in retriever_results: logger.debug(retriever_result_) - prompt_builder = prompt_builders[lang] + system_prompt = system_prompts[lang] + user_prompt_builder = user_prompt_builders[lang] - prompt_build = prompt_builder.run( - question=query.content, # As a Document instance, .content returns a string + user_prompt_build = user_prompt_builder.run( + question=query_document.content, documents=retriever_results['documents'] ) - prompt = prompt_build['prompt'] + prompt = user_prompt_build['prompt'] logger.debug(f'{prompt=}') - response = llm.run(prompt=prompt, generation_kwargs=None) + messages = [ + ChatMessage.from_system(system_prompt), + ChatMessage.from_user(prompt), + ] + + response = llm.run( + messages, + # generation_kwargs={"temperature": 0.2} + ) + + logger.debug(response) answer_builder = AnswerBuilder() answer_build = answer_builder.run( - query=query.content, # As a Document class, .content returns the string + query=query_document.content, replies=response['replies'], - meta=response['meta'], + meta=[r.meta for r in response['replies']], documents=retriever_results['documents'], pattern=None, reference_pattern=None From ff1fcab50294382484db5ce33a7f5a144ad76f1b Mon Sep 17 00:00:00 2001 From: Robert Timm Date: Mon, 19 Feb 2024 21:29:38 +0000 Subject: [PATCH 43/43] docs: fix run cmd --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d46be0d..6f04ed3 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ DOCKER_BUILDKIT=1 docker build . -t gbnc docker run \ --env HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \ --volume "$(pwd)/gswikichat":/workspace/gswikichat \ - --volume gbnc_cache:/root/.cache + --volume gbnc_cache:/root/.cache \ --publish 8000:8000 \ --rm \ --interactive \