[tests]: a nemo guardrails with retrieval rail to test matrix (#328)

jordanrfrazier · web-flow · commit ea9e94b82061 · 2024-03-14T10:17:20.000-07:00
* Add nemo guardrails to test matrix
diff --git a/.github/workflows/_run_e2e_tests.yml b/.github/workflows/_run_e2e_tests.yml
@@ -108,6 +108,7 @@ jobs:
           ASTRA_DB_ID: "${{ steps.astra-db.outputs.db_id }}"
           ASTRA_DB_ENV: "${{ inputs.astradb-env }}"
           OPEN_AI_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
+          OPENAI_API_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
           AZURE_OPEN_AI_KEY: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_KEY }}"
           AZURE_OPEN_AI_ENDPOINT: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_ENDPOINT }}"
           AZURE_BLOB_STORAGE_CONNECTION_STRING: "${{ secrets.E2E_TESTS_AZURE_BLOB_STORAGE_CONNECTION_STRING }}"
diff --git a/ragstack-e2e-tests/e2e_tests/langchain/nemo_guardrails.py b/ragstack-e2e-tests/e2e_tests/langchain/nemo_guardrails.py
@@ -0,0 +1,117 @@
+from e2e_tests.langchain.rag_application import (
+    BASIC_QA_PROMPT,
+    SAMPLE_DATA,
+)
+
+from langchain.schema.vectorstore import VectorStore
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.retriever import BaseRetriever
+from langchain.prompts import PromptTemplate
+from langchain.llms.base import BaseLLM
+
+
+from nemoguardrails import RailsConfig, LLMRails
+from nemoguardrails.actions.actions import ActionResult
+
+
+def _config(engine, model) -> str:
+    return f"""
+    models:
+      - type: main
+        engine: {engine}
+        model: {model}
+    """
+
+
+def _colang() -> str:
+    return """
+    define user express greeting
+        "Hi, how are you?"
+
+    define user ask about product
+        "What was MyFakeProductForTesting?"
+        "When was MyFakeProductForTesting first released?"
+        "What capabilities does MyFakeProductForTesting have?"
+        "What is MyFakeProductForTesting's best feature?"
+
+    define bot express greeting
+        "Hello! I hope to answer all your questions!"
+
+    define flow greeting
+        user express greeting
+        bot express greeting
+
+    define flow answer product question
+        user ask about product
+        $answer = execute rag()
+        bot $answer
+    """
+
+
+class NeMoRag:
+    def __init__(self, retriever):
+        self.retriever = retriever
+
+    async def rag_using_lc(self, context: dict, llm: BaseLLM) -> ActionResult:
+        """
+        Defines the custom rag action
+        """
+        user_message = context.get("last_user_message")
+        context_updates = {}
+
+        # Use your pre-defined AstraDB Vector Store as the retriever
+        relevant_documents = await self.retriever.aget_relevant_documents(user_message)
+        relevant_chunks = "\n".join(
+            [chunk.page_content for chunk in relevant_documents]
+        )
+
+        # Use a custom prompt template
+        prompt_template = PromptTemplate.from_template(BASIC_QA_PROMPT)
+        input_variables = {"question": user_message, "context": relevant_chunks}
+
+        # Create LCEL chain
+        chain = prompt_template | llm | StrOutputParser()
+        answer = await chain.ainvoke(input_variables)
+
+        return ActionResult(return_value=answer, context_updates=context_updates)
+
+    def init(self, app: LLMRails):
+        app.register_action(self.rag_using_lc, "rag")
+
+
+def _try_runnable_rails(config: RailsConfig, retriever: BaseRetriever) -> None:
+    # LLM is created internally to rails using the provided config
+    rails = LLMRails(config)
+    processor = NeMoRag(retriever)
+    processor.init(rails)
+
+    response = rails.generate(
+        messages=[
+            {
+                "role": "user",
+                "content": "Hi, how are you?",
+            }
+        ]
+    )
+    assert "Hello! I hope to answer all your questions" in response["content"]
+
+    response = rails.generate(
+        messages=[
+            {
+                "role": "user",
+                "content": "When was MyFakeProductForTesting first released?",
+            }
+        ]
+    )
+    assert "2020" in response["content"]
+
+
+def run_nemo_guardrails(vector_store: VectorStore, config: dict[str, str]) -> None:
+    vector_store.add_texts(SAMPLE_DATA)
+    retriever = vector_store.as_retriever()
+
+    model_config = _config(config["engine"], config["model"])
+    rails_config = RailsConfig.from_content(
+        yaml_content=model_config, colang_content=_colang()
+    )
+    _try_runnable_rails(config=rails_config, retriever=retriever)
diff --git a/ragstack-e2e-tests/e2e_tests/langchain/rag_application.py b/ragstack-e2e-tests/e2e_tests/langchain/rag_application.py
@@ -31,7 +31,7 @@
 
 
 BASIC_QA_PROMPT = """
-Answer the question based only on the supplied context. If you don't know the answer, say you don't know the answer.
+Answer the question based only on the supplied context. If you don't know the answer, say the following: "I don't know the answer".
 Context: {context}
 Question: {question}
 Your answer:
diff --git a/ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py b/ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
@@ -15,6 +15,7 @@
 )
 from e2e_tests.langchain.trulens import run_trulens_evaluation
 from e2e_tests.test_utils import get_local_resource_path
+from e2e_tests.langchain.nemo_guardrails import run_nemo_guardrails
 
 from langchain.chat_models import ChatOpenAI, AzureChatOpenAI, ChatVertexAI, BedrockChat
 from langchain.embeddings import (
@@ -59,17 +60,27 @@ def _chat_openai(**kwargs) -> ChatOpenAI:
 
 @pytest.fixture
 def openai_gpt35turbo_llm():
-    return _chat_openai(model="gpt-3.5-turbo", streaming=False)
+    # NeMo guardrails fails for this model with the given prompts.
+    model = "gpt-3.5-turbo"
+    return {"llm": _chat_openai(model=model, streaming=False), "nemo_config": None}
 
 
 @pytest.fixture
 def openai_gpt4_llm():
-    return _chat_openai(model="gpt-4", streaming=False)
+    model = "gpt-4"
+    return {
+        "llm": _chat_openai(model=model, streaming=False),
+        "nemo_config": {"engine": "openai", "model": model},
+    }
 
 
 @pytest.fixture
 def openai_gpt4_llm_streaming():
-    return _chat_openai(model="gpt-4", streaming=True)
+    model = "gpt-4"
+    return {
+        "llm": _chat_openai(model=model, streaming=True),
+        "nemo_config": {"engine": "openai", "model": model},
+    }
 
 
 def _openai_embeddings(**kwargs) -> OpenAIEmbeddings:
@@ -95,13 +106,16 @@ def openai_3large_embedding():
 def azure_openai_gpt35turbo_llm():
     # model is configurable because it can be different from the deployment
     # but the targeting model must be gpt-35-turbo
-    return AzureChatOpenAI(
-        azure_deployment=get_required_env("AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"),
-        openai_api_base=get_required_env("AZURE_OPEN_AI_ENDPOINT"),
-        openai_api_key=get_required_env("AZURE_OPEN_AI_KEY"),
-        openai_api_type="azure",
-        openai_api_version="2023-07-01-preview",
-    )
+    return {
+        "llm": AzureChatOpenAI(
+            azure_deployment=get_required_env("AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"),
+            openai_api_base=get_required_env("AZURE_OPEN_AI_ENDPOINT"),
+            openai_api_key=get_required_env("AZURE_OPEN_AI_KEY"),
+            openai_api_type="azure",
+            openai_api_version="2023-07-01-preview",
+        ),
+        "nemo_config": None,
+    }
 
 
 @pytest.fixture
@@ -123,7 +137,7 @@ def azure_openai_ada002_embedding():
 
 @pytest.fixture
 def vertex_bison_llm():
-    return ChatVertexAI(model_name="chat-bison")
+    return {"llm": ChatVertexAI(model_name="chat-bison"), "nemo_config": None}
 
 
 @pytest.fixture
@@ -137,21 +151,30 @@ def _bedrock_chat(**kwargs) -> BedrockChat:
 
 @pytest.fixture
 def bedrock_anthropic_claudev2_llm():
-    return _bedrock_chat(
-        model_id="anthropic.claude-v2",
-    )
+    return {
+        "llm": _bedrock_chat(
+            model_id="anthropic.claude-v2",
+        ),
+        "nemo_config": None,
+    }
 
 
 @pytest.fixture
 def bedrock_mistral_mistral7b_llm():
-    return _bedrock_chat(
-        model_id="mistral.mistral-7b-instruct-v0:2",
-    )
+    return {
+        "llm": _bedrock_chat(
+            model_id="mistral.mistral-7b-instruct-v0:2",
+        ),
+        "nemo_config": None,
+    }
 
 
 @pytest.fixture
 def bedrock_meta_llama2_llm():
-    return _bedrock_chat(model_id="meta.llama2-13b-chat-v1")
+    return {
+        "llm": _bedrock_chat(model_id="meta.llama2-13b-chat-v1"),
+        "nemo_config": None,
+    }
 
 
 @pytest.fixture
@@ -172,11 +195,14 @@ def bedrock_cohere_embedding():
 
 @pytest.fixture
 def huggingface_hub_flant5xxl_llm():
-    return HuggingFaceHub(
-        repo_id="google/flan-t5-xxl",
-        huggingfacehub_api_token=get_required_env("HUGGINGFACE_HUB_KEY"),
-        model_kwargs={"temperature": 1, "max_length": 256},
-    )
+    return {
+        "llm": HuggingFaceHub(
+            repo_id="google/flan-t5-xxl",
+            huggingfacehub_api_token=get_required_env("HUGGINGFACE_HUB_KEY"),
+            model_kwargs={"temperature": 1, "max_length": 256},
+        ),
+        "nemo_config": None,
+    }
 
 
 @pytest.fixture
@@ -190,7 +216,7 @@ def huggingface_hub_minilml6v2_embedding():
 @pytest.fixture
 def nvidia_aifoundation_nvolveqa40k_embedding():
     get_required_env("NVIDIA_API_KEY")
-    from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
+    from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings
 
     return NVIDIAEmbeddings(model="playground_nvolveqa_40k")
 
@@ -200,14 +226,17 @@ def nvidia_aifoundation_mixtral8x7b_llm():
     get_required_env("NVIDIA_API_KEY")
     from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
-    return ChatNVIDIA(model="playground_mixtral_8x7b")
+    return {"llm": ChatNVIDIA(model="playground_mixtral_8x7b"), "nemo_config": None}
 
 
 @pytest.mark.parametrize(
     "test_case",
-    ["rag_custom_chain", "conversational_rag", "trulens"],
+    ["rag_custom_chain", "conversational_rag", "trulens", "nemo_guardrails"],
+)
+@pytest.mark.parametrize(
+    "vector_store",
+    ["astra_db", "cassandra"],
 )
-@pytest.mark.parametrize("vector_store", ["astra_db", "cassandra"])
 @pytest.mark.parametrize(
     "embedding,llm",
     [
@@ -243,8 +272,15 @@ def test_rag(test_case, vector_store, embedding, llm, request, record_property):
     )
 
 
-def _run_test(test_case: str, vector_store_context, embedding, llm, record_property):
+def _run_test(
+    test_case: str,
+    vector_store_context,
+    embedding,
+    resolved_llm,
+    record_property,
+):
     vector_store = vector_store_context.new_langchain_vector_store(embedding=embedding)
+    llm = resolved_llm["llm"]
     if test_case == "rag_custom_chain":
         run_rag_custom_chain(
             vector_store=vector_store, llm=llm, record_property=record_property
@@ -256,8 +292,19 @@ def _run_test(test_case: str, vector_store_context, embedding, llm, record_prope
             chat_memory=vector_store_context.new_langchain_chat_memory(),
             record_property=record_property,
         )
+        # TODO: Add record property
     elif test_case == "trulens":
         run_trulens_evaluation(vector_store=vector_store, llm=llm)
+    elif test_case == "nemo_guardrails":
+        config = resolved_llm["nemo_config"]
+        if config:
+            # NeMo creates the LLM internally using the config
+            run_nemo_guardrails(
+                vector_store=vector_store,
+                config=config,
+            )
+        else:
+            pytest.skip("Skipping NeMo test for this configuration")
     else:
         raise ValueError(f"Unknown test case: {test_case}")
 
diff --git a/ragstack-e2e-tests/e2e_tests/test_utils/astradb_vector_store_handler.py b/ragstack-e2e-tests/e2e_tests/test_utils/astradb_vector_store_handler.py
@@ -60,7 +60,7 @@ def await_ongoing_deletions_completed(self):
         Blocks until all ongoing deletions are completed.
         """
         while self.semaphore._value != self.max_workers:
-            logging.info(
+            logging.debug(
                 f"{self.max_workers - self.semaphore._value} deletions still running, waiting to complete"
             )
             time.sleep(1)
diff --git a/ragstack-e2e-tests/pyproject.langchain.toml b/ragstack-e2e-tests/pyproject.langchain.toml
@@ -20,6 +20,7 @@ pillow = "^10.2.0"
 testcontainers = "^3.7.1"
 python-dotenv = "^1.0.1"
 trulens-eval = "^0.21.0"
+nemoguardrails = "^0.8.0"
 
 # From LangChain optional deps, needed by WebBaseLoader
 beautifulsoup4 = "^4"
diff --git a/ragstack-e2e-tests/pyproject.llamaindex.toml b/ragstack-e2e-tests/pyproject.llamaindex.toml
@@ -20,6 +20,7 @@ pillow = "^10.2.0"
 testcontainers = "^3.7.1"
 python-dotenv = "^1.0.1"
 trulens-eval = "^0.21.0"
+nemoguardrails = "^0.7.1"
 
 # From LangChain optional deps, needed by WebBaseLoader
 beautifulsoup4 = "^4"
diff --git a/ragstack-e2e-tests/pyproject.ragstack-ai.toml b/ragstack-e2e-tests/pyproject.ragstack-ai.toml
@@ -22,6 +22,7 @@ pillow = "^10.2.0"
 testcontainers = "^3.7.1"
 python-dotenv = "^1.0.1"
 trulens-eval = "^0.21.0"
+nemoguardrails = "^0.8.0"
 
 # From LangChain optional deps, needed by WebBaseLoader
 beautifulsoup4 = "^4"
diff --git a/ragstack-e2e-tests/pyproject.toml b/ragstack-e2e-tests/pyproject.toml
@@ -20,11 +20,15 @@ pillow = "^10.2.0"
 testcontainers = "^3.7.1"
 python-dotenv = "^1.0.1"
 trulens-eval = "^0.21.0"
+nemoguardrails = "^0.8.0"
 
 # From LangChain optional deps, needed by WebBaseLoader
 beautifulsoup4 = "^4"
 
-ragstack-ai = { path = "../", develop = false, extras = ["langchain-google", "langchain-nvidia"]}
+ragstack-ai = { path = "../", develop = false, extras = [
+    "langchain-google",
+    "langchain-nvidia",
+] }
 # we need this specific feature from cassio: https://github.com/CassioML/cassio/pull/128
 cassio = "~0.1.4"
 

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ def await_ongoing_deletions_completed(self):`
`60`	`60`	`Blocks until all ongoing deletions are completed.`
`61`	`61`	`"""`
`62`	`62`	`while self.semaphore._value != self.max_workers:`
`63`		`- logging.info(`
	`63`	`+ logging.debug(`
`64`	`64`	`f"{self.max_workers - self.semaphore._value} deletions still running, waiting to complete"`
`65`	`65`	`)`
`66`	`66`	`time.sleep(1)`