diff --git a/Dockerfile b/Dockerfile index 6202b20..dc8d862 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,20 +37,7 @@ COPY --from=ollama /usr/bin/ollama /usr/local/ollama/bin/ollama ENV PATH="/usr/local/ollama/bin:${PATH}" -# Pull a language model (see LICENSE_STABLELM2.txt) -ARG MODEL=stablelm2:1.6b-zephyr -ENV MODEL=${MODEL} -RUN ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama pull $MODEL - -# Build a language model -# ARG MODEL=discolm -# ENV MODEL=${MODEL} -# WORKDIR /tmp/model -# COPY --chmod=644 Modelfile Modelfile -# RUN curl --location https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q5_K_S.gguf?download=true --output discolm_german_7b_v1.Q5_K_S.gguf; ollama serve & while ! curl http://localhost:11434; do sleep 1; done; ollama create ${MODEL} -f Modelfile && rm -rf /tmp/model - - -# Setup the custom API and frontend +# Setup the app in workspace WORKDIR /workspace # Install backend dependencies @@ -58,6 +45,22 @@ COPY --chmod=755 requirements.txt requirements.txt RUN pip install -r requirements.txt +# Pull a language model (see LICENSE_STABLELM2.txt) +# ARG OLLAMA_MODEL_NAME=openchat +ARG OLLAMA_MODEL_NAME=stablelm2:1.6b-zephyr +ARG OLLAMA_URL=http://localhost:11434 + +ENV OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME} +ENV OLLAMA_URL=${OLLAMA_URL} + +RUN ollama serve & while ! curl ${OLLAMA_URL}; do sleep 1; done; ollama pull $OLLAMA_MODEL_NAME + + +# Load sentence-transformers model once in order to cache it in the image +# TODO: ARG / ENV for embedder model +RUN echo "from haystack.components.embedders import SentenceTransformersDocumentEmbedder\nSentenceTransformersDocumentEmbedder(model='svalabs/german-gpl-adapted-covid').warm_up()" | python3 + + # Install frontend dependencies COPY --chmod=755 frontend/package.json frontend/package.json COPY --chmod=755 frontend/yarn.lock frontend/yarn.lock @@ -69,7 +72,7 @@ COPY --chmod=755 json_input json_input # Copy backend for production -COPY --chmod=644 gswikichat gswikichat +COPY --chmod=755 gswikichat gswikichat # Copy and build frontend for production (into the frontend/dist folder) diff --git a/Modelfile b/Modelfile deleted file mode 100644 index e0c49cb..0000000 --- a/Modelfile +++ /dev/null @@ -1,2 +0,0 @@ -FROM ./discolm_german_7b_v1.Q5_K_S.gguf - diff --git a/README.md b/README.md index 2e2a009..6f04ed3 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,16 @@ To build and run the container locally with hot reload on python files do: ``` DOCKER_BUILDKIT=1 docker build . -t gbnc -docker run -v "$(pwd)/gswikichat":/workspace/gswikichat \ - -p 8000:8000 --rm --name gbnc -it gbnc \ - -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN +docker run \ + --env HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \ + --volume "$(pwd)/gswikichat":/workspace/gswikichat \ + --volume gbnc_cache:/root/.cache \ + --publish 8000:8000 \ + --rm \ + --interactive \ + --tty \ + --name gbnc \ + gbnc ``` Point your browser to http://localhost:8000/ and use the frontend. @@ -19,6 +26,19 @@ Point your browser to http://localhost:8000/ and use the frontend. The container works on [runpod.io](https://www.runpod.io/) GPU instances. A [template is available here](https://runpod.io/gsc?template=0w8z55rf19&ref=yfvyfa0s). +### Local development +#### Backend +``` +python -m venv .venv +. ./.venv/bin/activate +pip install -r requirements.txt +``` +#### Frontend +``` +cd frontend +yarn dev +``` + ## What's in the box ### Docker container @@ -44,3 +64,4 @@ A [FastAPI](https://fastapi.tiangolo.com/) server is running in the container. I ### Frontend A minimal frontend lets the user input a question and renders the response from the system. + diff --git a/cache/.keep b/cache/.keep new file mode 100644 index 0000000..e69de29 diff --git a/frontend/src/components/field/FieldAnswer.vue b/frontend/src/components/field/FieldAnswer.vue index 5c78d55..99afac7 100644 --- a/frontend/src/components/field/FieldAnswer.vue +++ b/frontend/src/components/field/FieldAnswer.vue @@ -12,7 +12,7 @@
-
+
diff --git a/frontend/src/views/ChatView.vue b/frontend/src/views/ChatView.vue index 518a88f..981d2d7 100644 --- a/frontend/src/views/ChatView.vue +++ b/frontend/src/views/ChatView.vue @@ -95,8 +95,9 @@ const inputFocused = ref(false) // } function search() { + response.value = undefined; displayResponse.value = true - fetch(`/api?q=${inputText.value}`) + fetch(`/api?query=${inputText.value}`) .then((response) => response.json()) .then((data) => { response.value = data diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index c29f416..0f4d1b0 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -5,7 +5,7 @@ import vue from '@vitejs/plugin-vue' // https://vitejs.dev/config/ export default defineConfig({ - base: '/frontend/dist', + base: '/', plugins: [ vue(), ], diff --git a/gswikichat/__init__.py b/gswikichat/__init__.py index eab6613..0a0e47b 100644 --- a/gswikichat/__init__.py +++ b/gswikichat/__init__.py @@ -1,2 +1 @@ from .api import * -# from .haystack2beta_tutorial_InMemoryEmbeddingRetriever import * diff --git a/gswikichat/api.py b/gswikichat/api.py index a05ff27..c97ff2a 100644 --- a/gswikichat/api.py +++ b/gswikichat/api.py @@ -1,71 +1,59 @@ -from fastapi.responses import RedirectResponse +from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from fastapi import FastAPI -# from .rag import rag_pipeline -from .rag import embedder, retriever, prompt_builder, llm, answer_builder +from .rag import rag_pipeline + from haystack import Document +from .logger import get_logger + +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) +FRONTEND_STATIC_DIR = './frontend/dist' app = FastAPI() + app.mount( - "/frontend/dist", - StaticFiles(directory="frontend/dist", html=True), - name="frontend" + "/assets", + StaticFiles(directory=f"{FRONTEND_STATIC_DIR}/assets"), + name="frontend-assets" ) - @app.get("/") async def root(): - return RedirectResponse(url="/frontend/dist", status_code=302) + return FileResponse(f"{FRONTEND_STATIC_DIR}/index.html") +@app.get("/favicon.ico") +async def favicon(): + return FileResponse(f"{FRONTEND_STATIC_DIR}/favicon.ico") @app.get("/api") -async def api(q): - - embedder, retriever, prompt_builder, llm, answer_builder - - # query = "How many languages are there?" - query = Document(content=q) - - result = embedder.run([query]) - - results = retriever.run( - query_embedding=list(result['documents'][0].embedding), - filters=None, - top_k=None, - scale_score=None, - return_embedding=None +async def api(query, top_k=3, lang='en'): + if not lang in ['en', 'de']: + raise Exception("language must be 'en' or 'de'") + + logger.debug(f'{query=}') # Assuming we change the input name + logger.debug(f'{top_k=}') + logger.debug(f'{lang=}') + + answer = rag_pipeline( + query=query, + top_k=top_k, + lang=lang ) - # .run( - # result['documents'][0].embedding - # ) - - prompt = prompt_builder.run(documents=results['documents'])['prompt'] - response = llm.run(prompt=prompt, generation_kwargs=None) - # reply = response['replies'][0] - - # rag_pipeline.connect("llm.replies", "answer_builder.replies") - # rag_pipeline.connect("llm.metadata", "answer_builder.meta") - # rag_pipeline.connect("retriever", "answer_builder.documents") - - results = answer_builder.run( - query=q, - replies=response['replies'], - meta=response['meta'], - documents=results['documents'], - pattern=None, - reference_pattern=None - ) + sources = [ + { + "src": d_.meta['src'], + "content": d_.content, + "score": d_.score + } for d_ in answer.documents + ] - answer = results['answers'][0] + logger.debug(f'{answer=}') return { - "answer": answer.data, - "sources": [{ - "src": d.meta['src'], - "content": d.content, - "score": d.score - } for d in answer.documents] + "answer": answer.data.content, + "sources": sources } diff --git a/gswikichat/llm_config.py b/gswikichat/llm_config.py index 10ded24..5af6c1c 100644 --- a/gswikichat/llm_config.py +++ b/gswikichat/llm_config.py @@ -1,9 +1,23 @@ import os -from haystack_integrations.components.generators.ollama import OllamaGenerator +from haystack_integrations.components.generators.ollama import OllamaChatGenerator -# TODO: discolm prompt https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1 -print(f"Setting up ollama with {os.getenv('MODEL')}") -llm = OllamaGenerator( - model=os.getenv("MODEL"), - url="http://localhost:11434/api/generate" +from .logger import get_logger + +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) + +OLLAMA_MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") +OLLAMA_URL = os.environ.get("OLLAMA_URL") +OLLAMA_CHAT_URL = f"{OLLAMA_URL}/api/chat" + +logger.info(f'Using {OLLAMA_MODEL_NAME=}') +logger.info(f'Endpoint: {OLLAMA_URL=}') +logger.info(f'Generate: {OLLAMA_CHAT_URL=}') + +logger.info(f"Setting up ollama with {OLLAMA_MODEL_NAME}") + +llm = OllamaChatGenerator( + model=OLLAMA_MODEL_NAME, + url=OLLAMA_CHAT_URL, + timeout=120 ) diff --git a/gswikichat/logger.py b/gswikichat/logger.py new file mode 100644 index 0000000..5d89447 --- /dev/null +++ b/gswikichat/logger.py @@ -0,0 +1,30 @@ +import logging +import sys + + +def get_logger(name): + # Create a logger + # Source: https://docs.python.org/3/howto/logging.html + logging.basicConfig( + filename='gbnc_api.log', + encoding='utf-8', + level=logging.DEBUG + ) + + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) # Set the logging level + + # Source: stackoverflow.com/questions/14058453/ + # making-python-loggers-output-all-messages- + # to-stdout-in-addition-to-log-file + + # Create console handler and set level to debug + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + + return logger diff --git a/gswikichat/prompt.py b/gswikichat/prompt.py index 36a6ebb..d3306fc 100644 --- a/gswikichat/prompt.py +++ b/gswikichat/prompt.py @@ -1,36 +1,28 @@ from haystack.components.builders.prompt_builder import PromptBuilder -# prompt_template = """ -# Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents. -# Documents: -# {% for doc in documents %} -# If {{ doc.content }} answers the Question: {{question}} -# Then return {{ doc.meta["src"] }} -# {% endfor %} -# """ - -prompt_template = """ -<|system|> -You are a helpful assistant. You answer questions based on the given documents. -Answer based on the documents only. If the information is not in the documents, -say that you cannot find the information. -<|endoftext|> -<|user|> +prompt_template_en = """ Documents: -{% for doc in documents %} - {{ doc.content }} +{% for doc_ in documents %} + {{ doc_.content }} {% endfor %} With this documents, answer the following question: {{question}} -<|endoftext|> -<|assistant|> """ -# prompt_template = """ -# Given these documents, answer the question. Answer in a full sentence. Give the response only, no explanation. Don't mention the documents. -# Documents: -# If {{ doc.content }} answers the Question: {{question}} -# Then only return {{ doc.meta["src"] }} and nothing at all. -# {% endfor %} -# """ +prompt_template_de = """ +Dokumente: +{% for doc_ in documents %} + {{ doc_.content }} +{% endfor %} +Mit diesen Dokumenten, beantworte die folgende Frage: {{question}} +""" + +system_prompts = { + 'en': 'You are a helpful assistant. You answer questions based on the given documents. Answer based on the documents only. If the information is not in the documents, say that you cannot find the information.', + 'de': 'Du bist ein hilfreicher Assistent. Du beantwortest Fragen basierend auf den vorliegenden Dokumenten. Beantworte basierend auf den Dokumenten nur. Wenn die Information nicht in den Dokumenten ist, sage, dass du sie nicht finden kannst.', +} + +user_prompt_builders = { + 'en': PromptBuilder(template=prompt_template_en), + 'de': PromptBuilder(template=prompt_template_de), +} -prompt_builder = PromptBuilder(template=prompt_template) diff --git a/gswikichat/rag.py b/gswikichat/rag.py index 8e198f0..b916686 100644 --- a/gswikichat/rag.py +++ b/gswikichat/rag.py @@ -1,31 +1,69 @@ - -from haystack import Pipeline +# from haystack import Pipeline +from haystack import Document from haystack.components.builders.answer_builder import AnswerBuilder +from haystack.dataclasses import ChatMessage from .llm_config import llm -from .prompt import prompt_builder +from .logger import get_logger +from .prompt import user_prompt_builders, system_prompts from .vector_store_interface import embedder, retriever, input_documents -answer_builder = AnswerBuilder() - -# rag_pipeline = Pipeline() -# rag_pipeline.add_component("text_embedder", embedder) -# rag_pipeline.add_component("retriever", retriever) -# # rag_pipeline.add_component("writer", writer) -# rag_pipeline.add_component("prompt_builder", prompt_builder) -# rag_pipeline.add_component("llm", llm) -# rag_pipeline.add_component("answer_builder", answer_builder) - -# # rag_pipeline.connect("embedder", "writer") -# rag_pipeline.connect("retriever.documents", "text_embedder") -# rag_pipeline.connect("retriever", "prompt_builder.documents") -# rag_pipeline.connect("prompt_builder", "llm") -# rag_pipeline.connect("llm.replies", "answer_builder.replies") -# rag_pipeline.connect("llm.metadata", "answer_builder.meta") -# rag_pipeline.connect("retriever", "answer_builder.documents") - -# rag_pipeline.run( -# { -# "text_embedder": {"documents": input_documents} -# } -# ) +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) + + +def rag_pipeline(query: str, top_k: int = 3, lang: str = 'de'): + + query_document = Document(content=query) + query_embedded = embedder.run([query_document]) + query_embedding = query_embedded['documents'][0].embedding + + retriever_results = retriever.run( + query_embedding=list(query_embedding), + filters=None, + top_k=top_k, + scale_score=None, + return_embedding=None + ) + + logger.debug('retriever results:') + for retriever_result_ in retriever_results: + logger.debug(retriever_result_) + + system_prompt = system_prompts[lang] + user_prompt_builder = user_prompt_builders[lang] + + user_prompt_build = user_prompt_builder.run( + question=query_document.content, + documents=retriever_results['documents'] + ) + + prompt = user_prompt_build['prompt'] + + logger.debug(f'{prompt=}') + + messages = [ + ChatMessage.from_system(system_prompt), + ChatMessage.from_user(prompt), + ] + + response = llm.run( + messages, + # generation_kwargs={"temperature": 0.2} + ) + + logger.debug(response) + + answer_builder = AnswerBuilder() + answer_build = answer_builder.run( + query=query_document.content, + replies=response['replies'], + meta=[r.meta for r in response['replies']], + documents=retriever_results['documents'], + pattern=None, + reference_pattern=None + ) + + logger.debug(f'{answer_build=}') + + return answer_build['answers'][0] diff --git a/gswikichat/vector_store_interface.py b/gswikichat/vector_store_interface.py index bc99b9d..95d52db 100644 --- a/gswikichat/vector_store_interface.py +++ b/gswikichat/vector_store_interface.py @@ -1,46 +1,65 @@ import os import json -# from sentence_transformers import SentenceTransformer from tqdm import tqdm from haystack import Document # , Pipeline from haystack.components.embedders import SentenceTransformersDocumentEmbedder -# from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.document_stores.in_memory import InMemoryDocumentStore -# from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever -# from haystack.components.writers import DocumentWriter from haystack.document_stores.types.policy import DuplicatePolicy +from haystack.components.preprocessors import DocumentSplitter +from haystack.components.preprocessors import DocumentCleaner + +import torch + +from .logger import get_logger + +# Create logger instance from base logger config in `logger.py` +logger = get_logger(__name__) HUGGING_FACE_HUB_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN') + +# disable this line to disable the embedding cache +EMBEDDING_CACHE_FILE = '/root/.cache/gbnc_embeddings.json' + top_k = 5 input_documents = [] +device = "cpu" + +if torch.cuda.is_available(): + logger.info('GPU is available.') + device = "cuda" + + +# TODO: Add the json strings as env variables json_dir = 'json_input' -json_fname = 'excellent-articles_10_paragraphs.json' +json_fname = 'excellent-articles_10.json' + json_fpath = os.path.join(json_dir, json_fname) if os.path.isfile(json_fpath): - print(f'[INFO] Loading data from {json_fpath}') + logger.info(f'Loading data from {json_fpath}') with open(json_fpath, 'r') as finn: json_obj = json.load(finn) if isinstance(json_obj, dict): - for k, v in tqdm(json_obj.items()): - print(f"Loading {k}") - input_documents.append(Document(content=v, meta={"src": k})) + input_documents = [ + Document( + content=content_, + meta={"src": url_} + ) + for url_, content_ in tqdm(json_obj.items()) + ] elif isinstance(json_obj, list): - for obj_ in tqdm(json_obj): - url = obj_['meta'] - content = obj_['content'] - - input_documents.append( - Document( - content=content, - meta={'src': url} - ) + input_documents = [ + Document( + content=obj_['content'], + meta={'src': obj_['meta']} ) + for obj_ in tqdm(json_obj) + ] else: input_documents = [ Document( @@ -57,112 +76,63 @@ ), ] -# Write documents to InMemoryDocumentStore +splitter = DocumentSplitter( + split_by="sentence", + split_length=5, + split_overlap=0 +) +input_documents = splitter.run(input_documents)['documents'] + +cleaner = DocumentCleaner( + remove_empty_lines=True, + remove_extra_whitespaces=True, + remove_repeated_substrings=False +) +input_documents = cleaner.run(input_documents)['documents'] + document_store = InMemoryDocumentStore( embedding_similarity_function="cosine", # embedding_dim=768, # duplicate_documents="overwrite" ) -# document_store.write_documents(input_documents) - -# TODO Introduce Jina.AI from HuggingFace. Establish env-variable for trust_... - -# basic_transformer_models = [ -# "all-MiniLM-L6-v2", -# "xlm-clm-ende-1024", -# "xlm-mlm-ende-1024", -# "bert-base-german-cased", -# "bert-base-german-dbmdz-cased", -# "bert-base-german-dbmdz-uncased", -# "distilbert-base-german-cased", -# "xlm-roberta-large-finetuned-conll03-german", -# "deutsche-telekom/gbert-large-paraphrase-cosine" -# ] - -# https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 -# sentence_transformer_model = "all-MiniLM-L6-v2" -# 3 minutes to batch 82 - -# https://huggingface.co/deutsche-telekom/gbert-large-paraphrase-cosine -# sentence_transformer_model = 'deutsche-telekom/gbert-large-paraphrase-cosine' -# 76 minutes to batch 82 - -# https://huggingface.co/jinaai/jina-embeddings-v2-base-de -# sentence_transformer_model = 'jinaai/jina-embeddings-v2-base-de' -# Cannot find or load the embedding model -# Unknown minutes to batch 82 - -# https://huggingface.co/aari1995/German_Semantic_STS_V2 -# sentence_transformer_model = 'aari1995/German_Semantic_STS_V2' -# 75 minutes to batch 82 - -# https://huggingface.co/Sahajtomar/German-semantic -# sentence_transformer_model = 'Sahajtomar/German-semantic' -# 72 minutes to batch 82 # https://huggingface.co/svalabs/german-gpl-adapted-covid sentence_transformer_model = 'svalabs/german-gpl-adapted-covid' -# 2 minutes to batch 82 - -# https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german -# sentence_transformer_model = 'PM-AI/bi-encoder_msmarco_bert-base_german' -# 14 minutes to batch 82 - -# https://huggingface.co/JoBeer/german-semantic-base -# sentence_transformer_model = 'JoBeer/german-semantic-base' -# 22 minutes to batch 82 - -print(f'Sentence Transformer Name:{sentence_transformer_model}') +logger.info(f'Sentence Transformer Name: {sentence_transformer_model}') embedder = SentenceTransformersDocumentEmbedder( model=sentence_transformer_model, - # model="T-Systems-onsite/german-roberta-sentence-transformer-v2", - # model="jinaai/jina-embeddings-v2-base-de", - # token=HUGGING_FACE_HUB_TOKEN + device=device ) - -# hg_embedder = SentenceTransformer( -# "jinaai/jina-embeddings-v2-base-de", -# token=HUGGING_FACE_HUB_TOKEN -# ) - embedder.warm_up() -documents_with_embeddings = embedder.run(input_documents) -# documents_with_embeddings = embedder.encode(input_documents) +if EMBEDDING_CACHE_FILE and os.path.isfile(EMBEDDING_CACHE_FILE): + logger.info('Loading embeddings from cache') -# print('\n\n') -# # print(documents_with_embeddings['documents']) -# print(type(documents_with_embeddings['documents'])) -# print(len(documents_with_embeddings['documents'])) -# print(dir(documents_with_embeddings['documents'][0])) -# print('\n\n') -# print(type(embedder.model)) -# print('\n\n') -# # print(dir(hg_embedder)) + with open(EMBEDDING_CACHE_FILE, 'r') as f_in: + documents_dict = json.load(f_in) + document_store.write_documents( + documents=[Document.from_dict(d_) for d_ in documents_dict], + policy=DuplicatePolicy.OVERWRITE + ) - -document_store.write_documents( - documents=documents_with_embeddings['documents'], - policy=DuplicatePolicy.OVERWRITE -) - -retriever = InMemoryEmbeddingRetriever( - # embedding_model="sentence-transformers/all-MiniLM-L6-v2", - document_store=document_store, - top_k=top_k -) - -# writer = DocumentWriter(document_store=document_store) - -# indexing_pipeline = Pipeline() -# indexing_pipeline.add_component("embedder", embedder) -# indexing_pipeline.add_component("writer", writer) -# indexing_pipeline.connect("embedder", "writer") -# indexing_pipeline.run( -# { -# "embedder": {"documents": input_documents} -# } -# ) +else: + logger.debug("Generating embeddings") + + embedded = embedder.run(input_documents) + document_store.write_documents( + documents=embedded['documents'], + policy=DuplicatePolicy.OVERWRITE + ) + + if EMBEDDING_CACHE_FILE: + with open(EMBEDDING_CACHE_FILE, 'w') as f_out: + documents_dict = [ + Document.to_dict(d_) + for d_ in embedded['documents'] + ] + json.dump(documents_dict, f_out) + +retriever = InMemoryEmbeddingRetriever(document_store=document_store) diff --git a/requirements.txt b/requirements.txt index b03f924..723011a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,7 +32,7 @@ python-dotenv==1.0.1 pytz==2023.3.post1 PyYAML==6.0.1 requests==2.31.0 -sentence-transformers>=2.2.0 +sentence-transformers==2.3.1 six==1.16.0 sniffio==1.3.0 starlette==0.35.1 diff --git a/research_log.md b/research_log.md new file mode 100644 index 0000000..a6d31ed --- /dev/null +++ b/research_log.md @@ -0,0 +1,48 @@ +## Sentence Transformers Statistics + +``` +basic_transformer_models = [ + "all-MiniLM-L6-v2", + "xlm-clm-ende-1024", + "xlm-mlm-ende-1024", + "bert-base-german-cased", + "bert-base-german-dbmdz-cased", + "bert-base-german-dbmdz-uncased", + "distilbert-base-german-cased", + "xlm-roberta-large-finetuned-conll03-german", + "deutsche-telekom/gbert-large-paraphrase-cosine" +] + +https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 +sentence_transformer_model = "all-MiniLM-L6-v2" +3 minutes to batch 82 + +https://huggingface.co/deutsche-telekom/gbert-large-paraphrase-cosine +sentence_transformer_model = 'deutsche-telekom/gbert-large-paraphrase-cosine' +76 minutes to batch 82 + +https://huggingface.co/jinaai/jina-embeddings-v2-base-de +sentence_transformer_model = 'jinaai/jina-embeddings-v2-base-de' +Cannot find or load the embedding model +Unknown minutes to batch 82 + +https://huggingface.co/aari1995/German_Semantic_STS_V2 +sentence_transformer_model = 'aari1995/German_Semantic_STS_V2' +75 minutes to batch 82 + +https://huggingface.co/Sahajtomar/German-semantic +sentence_transformer_model = 'Sahajtomar/German-semantic' +72 minutes to batch 82 + +https://huggingface.co/svalabs/german-gpl-adapted-covid +ntence_transformer_model = 'svalabs/german-gpl-adapted-covid' +2 minutes to batch 82 + +https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german +sentence_transformer_model = 'PM-AI/bi-encoder_msmarco_bert-base_german' +14 minutes to batch 82 + +https://huggingface.co/JoBeer/german-semantic-base +sentence_transformer_model = 'JoBeer/german-semantic-base' +22 minutes to batch 82 +``` diff --git a/start.sh b/start.sh index cf21953..b7a27f8 100644 --- a/start.sh +++ b/start.sh @@ -1,11 +1,13 @@ #!/bin/bash +set -e + if [[ $PUBLIC_KEY ]] then mkdir -p ~/.ssh chmod 700 ~/.ssh cd ~/.ssh - echo $PUBLIC_KEY >> authorized_keys + echo "$PUBLIC_KEY" >> authorized_keys chmod 700 -R ~/.ssh cd / service ssh start @@ -16,10 +18,17 @@ fi echo "Starting ollama" ollama serve & +while ! curl "$OLLAMA_URL"; do + sleep 1 +done + +echo "Pulling $OLLAMA_MODEL_NAME from ollama library" +ollama pull "$OLLAMA_MODEL_NAME" + cd /workspace echo "Starting api" uvicorn gswikichat:app --reload --host 0.0.0.0 --port 8000 & -echo "Sleeping..." +echo "Ready" sleep infinity