marklogic
diff --git a/‎examples/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎examples/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/langchain/.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎examples/langchain/.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/langchain/README.md‎
Lines changed: 70 additions & 0 deletions b/‎examples/langchain/README.md‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎examples/langchain/ask.py‎
Lines changed: 37 additions & 0 deletions b/‎examples/langchain/ask.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎examples/langchain/build.gradle‎
Lines changed: 4 additions & 0 deletions b/‎examples/langchain/build.gradle‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/langchain/docker-compose.yml‎
Lines changed: 17 additions & 0 deletions b/‎examples/langchain/docker-compose.yml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎examples/langchain/gradle.properties‎
Lines changed: 4 additions & 0 deletions b/‎examples/langchain/gradle.properties‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/langchain/gradle/wrapper/gradle-wrapper.jar‎
59.3 KB b/‎examples/langchain/gradle/wrapper/gradle-wrapper.jar‎
59.3 KB
diff --git a/‎examples/langchain/gradle/wrapper/gradle-wrapper.properties‎
Lines changed: 6 additions & 0 deletions b/‎examples/langchain/gradle/wrapper/gradle-wrapper.properties‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/langchain/gradlew‎
Lines changed: 160 additions & 0 deletions b/‎examples/langchain/gradlew‎
Lines changed: 160 additions & 0 deletions
@@ -1 +1,2 @@
 .ipynb_checkpoints
+.env
@@ -0,0 +1,4 @@
+docker
+.gradle
+build
+
@@ -0,0 +1,70 @@
+# Example langchain retriever
+
+This project demonstrates one approach for implementing a 
+[langchain retriever](https://python.langchain.com/docs/modules/data_connection/)
+that allows for 
+[Retrieval Augmented Generation (RAG)](https://python.langchain.com/docs/use_cases/question_answering/)
+to be supported via MarkLogic and the MarkLogic Python Client. This example uses the same data as in 
+[the langchain RAG quickstart guide](https://python.langchain.com/docs/use_cases/question_answering/quickstart), 
+but with the data having first been loaded into MarkLogic.
+
+**This is only intended as an example** of how easily a langchain retriever can be developed
+using the MarkLogic Python Client. The queries in this example are simple and naturally 
+do not have any knowledge of how your data is modeled in MarkLogic. You are encouraged to use 
+this as an example for developing your own retriever, where you can build a query based on a 
+question submitted to langchain that fully leverages the indexes and data models in your MarkLogic
+application. Additionally, please see the 
+[langchain documentation on splitting text](https://python.langchain.com/docs/modules/data_connection/document_transformers/). You may need to restructure your data so that you have a larger number of 
+smaller documents in your database so that you do not exceed the limit that langchain imposes on how
+much data a retriever can return.
+
+# Setup
+
+To try out this project, use [docker-compose](https://docs.docker.com/compose/) to instantiate a new MarkLogic 
+instance with port 8003 available (you can use your own MarkLogic instance too, just be sure that port 8003
+is available):
+
+    docker-compose up -d --build
+
+Then deploy a small REST API application to MarkLogic, which includes a basic non-admin MarkLogic user 
+named `langchain-user`:
+
+    ./gradlew -i mlDeploy
+
+Next, create a new Python virtual environment - [pyenv](https://github.com/pyenv/pyenv) is recommended for this - 
+and install the 
+[langchain example dependencies](https://python.langchain.com/docs/use_cases/question_answering/quickstart#dependencies),
+along with the MarkLogic Python Client: 
+
+    pip install -U langchain langchain_openai langchain-community langchainhub openai chromadb bs4 marklogic_python_client
+
+Then run the following Python program to load text data from the langchain quickstart guide 
+into two different collections in the `langchain-test-content` database:
+
+    python load_data.py
+
+Create a ".env" file to hold your OpenAI API key:
+
+    echo "OPENAI_API_KEY=<your key here>" > .env
+
+# Testing the retriever
+
+You are now ready to test the example retriever. Run the following to ask a question with the 
+results augmented via the `marklogic_retriever.py` module in this project; you will be 
+prompted for an OpenAI API key when you run this, which you can type or paste in:
+
+    python ask.py "What is task decomposition?" posts
+
+The retriever uses a [cts.similarQuery](https://docs.marklogic.com/cts.similarQuery) to select from the documents 
+loaded via `load_data.py`. It defaults to a page length of 10. You can change this by providing a command line
+argument - e.g.:
+
+    python ask.py "What is task decomposition?" posts 15
+
+Example of a question for the "sotu" (State of the Union speech) collection:
+
+    python ask.py "What are economic sanctions?" sotu 20
+
+To use a word query instead of a similar query, along with a set of drop words, specify "word" as the 4th argument:
+
+    python ask.py "What are economic sanctions?" sotu 20 word
@@ -0,0 +1,37 @@
+# Based on example at
+# https://python.langchain.com/docs/use_cases/question_answering/quickstart . 
+
+import sys
+from dotenv import load_dotenv
+from langchain import hub
+from langchain_openai import ChatOpenAI
+from langchain.schema import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from marklogic import Client
+from marklogic_retriever import MarkLogicRetriever
+
+
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+
+
+question = sys.argv[1]
+
+retriever = MarkLogicRetriever.create(
+    Client("http://localhost:8003", digest=("langchain-user", "password"))
+)
+retriever.collections = [sys.argv[2]]
+retriever.max_results = int(sys.argv[3]) if len(sys.argv) > 3 else 10
+if len(sys.argv) > 4:
+    retriever.query_type = sys.argv[4]
+
+load_dotenv()
+
+prompt = hub.pull("rlm/rag-prompt")
+llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
+
+rag_chain = (
+    {"context": retriever | format_docs, "question": RunnablePassthrough()}
+    | prompt | llm | StrOutputParser()
+)
+print(rag_chain.invoke(question))
@@ -0,0 +1,4 @@
+plugins {
+    id "net.saliman.properties" version "1.5.2"
+    id "com.marklogic.ml-gradle" version "4.6.0"
+}
@@ -0,0 +1,17 @@
+version: '3.8'
+name: marklogic_langchain
+
+services:
+
+  marklogic:
+    image: "marklogicdb/marklogic-db:11.1.0-centos-1.1.0"
+    platform: linux/amd64
+    environment:
+      - MARKLOGIC_INIT=true
+      - MARKLOGIC_ADMIN_USERNAME=admin
+      - MARKLOGIC_ADMIN_PASSWORD=admin
+    volumes:
+      - ./docker/marklogic/logs:/var/opt/MarkLogic/Logs
+    ports:
+      - "8000-8003:8000-8003"
+
@@ -0,0 +1,4 @@
+mlAppName=langchain-test
+mlRestPort=8003
+mlUsername=admin
+mlPassword=admin
@@ -0,0 +1,6 @@
+#Tue Mar 22 14:27:38 EDT 2016
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +docker
 +.gradle
 +build
++