precommit

codefromthecrypt · codefromthecrypt · commit cf0ec3e1cc27 · 2024-12-19T14:38:03.000+08:00
Signed-off-by: Adrian Cole &lt;adrian.cole@elastic.co&gt;
diff --git a/example-apps/chatbot-rag-app/api/chat.py b/example-apps/chatbot-rag-app/api/chat.py
@@ -3,7 +3,11 @@
 
 from elasticsearch import Elasticsearch
 from flask import current_app, render_template, stream_with_context
-from langchain_elasticsearch import ElasticsearchChatMessageHistory, ElasticsearchStore, SparseVectorStrategy
+from langchain_elasticsearch import (
+    ElasticsearchChatMessageHistory,
+    ElasticsearchStore,
+    SparseVectorStrategy,
+)
 from langchain_openai import ChatOpenAI
 from llm_integrations import get_llm
 from elasticsearch_client import (
@@ -12,7 +16,9 @@
 )
 
 INDEX = os.getenv("ES_INDEX", "workplace-app-docs")
-INDEX_CHAT_HISTORY = os.getenv("ES_INDEX_CHAT_HISTORY", "workplace-app-docs-chat-history")
+INDEX_CHAT_HISTORY = os.getenv(
+    "ES_INDEX_CHAT_HISTORY", "workplace-app-docs-chat-history"
+)
 ELSER_MODEL = os.getenv("ELSER_MODEL", ".elser_model_2")
 SESSION_ID_TAG = "[SESSION_ID]"
 SOURCE_TAG = "[SOURCE]"
@@ -26,12 +32,15 @@
 
 llm = get_llm()
 
+
 @stream_with_context
 def ask_question(question, session_id):
     yield f"data: {SESSION_ID_TAG} {session_id}\n\n"
     current_app.logger.debug("Chat session ID: %s", session_id)
 
-    chat_history = get_elasticsearch_chat_message_history(INDEX_CHAT_HISTORY, session_id)
+    chat_history = get_elasticsearch_chat_message_history(
+        INDEX_CHAT_HISTORY, session_id
+    )
 
     if len(chat_history.messages) > 0:
         # create a condensed question
@@ -50,7 +59,9 @@ def ask_question(question, session_id):
     docs = store.as_retriever().invoke(condensed_question)
     for doc in docs:
         doc_source = {**doc.metadata, "page_content": doc.page_content}
-        current_app.logger.debug("Retrieved document passage from: %s", doc.metadata["name"])
+        current_app.logger.debug(
+            "Retrieved document passage from: %s", doc.metadata["name"]
+        )
         yield f"data: {SOURCE_TAG} {json.dumps(doc_source)}\n\n"
 
     qa_prompt = render_template(
@@ -62,7 +73,9 @@ def ask_question(question, session_id):
 
     answer = ""
     for chunk in llm.stream(qa_prompt):
-        content = chunk.content.replace("\n", " ")  # the stream can get messed up with newlines
+        content = chunk.content.replace(
+            "\n", " "
+        )  # the stream can get messed up with newlines
         yield f"data: {content}\n\n"
         answer += chunk.content
 
diff --git a/example-apps/chatbot-rag-app/api/elasticsearch_client.py b/example-apps/chatbot-rag-app/api/elasticsearch_client.py
@@ -12,7 +12,7 @@
 if ELASTICSEARCH_URL:
     elasticsearch_client = Elasticsearch(
         hosts=[ELASTICSEARCH_URL],
-        basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD)
+        basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD),
     )
 elif ELASTIC_CLOUD_ID:
     elasticsearch_client = Elasticsearch(
diff --git a/example-apps/chatbot-rag-app/data/index_data.py b/example-apps/chatbot-rag-app/data/index_data.py
@@ -21,7 +21,7 @@
 if ELASTICSEARCH_URL:
     elasticsearch_client = Elasticsearch(
         hosts=[ELASTICSEARCH_URL],
-        basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD)
+        basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD),
     )
 elif ELASTIC_CLOUD_ID:
     elasticsearch_client = Elasticsearch(
@@ -39,16 +39,22 @@ def install_elser():
         print(f'"{ELSER_MODEL}" model is available')
     except NotFoundError:
         print(f'"{ELSER_MODEL}" model not available, downloading it now')
-        elasticsearch_client.ml.put_trained_model(model_id=ELSER_MODEL, input={"field_names": ["text_field"]})
+        elasticsearch_client.ml.put_trained_model(
+            model_id=ELSER_MODEL, input={"field_names": ["text_field"]}
+        )
         while True:
-            status = elasticsearch_client.ml.get_trained_models(model_id=ELSER_MODEL, include="definition_status")
+            status = elasticsearch_client.ml.get_trained_models(
+                model_id=ELSER_MODEL, include="definition_status"
+            )
             if status["trained_model_configs"][0]["fully_defined"]:
                 # model is ready
                 break
             time.sleep(1)
 
         print("Model downloaded, starting deployment")
-        elasticsearch_client.ml.start_trained_model_deployment(model_id=ELSER_MODEL, wait_for="fully_allocated")
+        elasticsearch_client.ml.start_trained_model_deployment(
+            model_id=ELSER_MODEL, wait_for="fully_allocated"
+        )
 
 
 def main():
@@ -69,7 +75,9 @@ def main():
 
     print(f"Loaded {len(workplace_docs)} documents")
 
-    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=512, chunk_overlap=256)
+    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+        chunk_size=512, chunk_overlap=256
+    )
 
     docs = text_splitter.transform_documents(workplace_docs)
 

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`if ELASTICSEARCH_URL:`
`13`	`13`	`elasticsearch_client = Elasticsearch(`
`14`	`14`	`hosts=[ELASTICSEARCH_URL],`
`15`		`- basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD)`
	`15`	`+ basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD),`
`16`	`16`	`)`
`17`	`17`	`elif ELASTIC_CLOUD_ID:`
`18`	`18`	`elasticsearch_client = Elasticsearch(`