updated sample app used by AI PODs workshop

dmitchsplunk · dmitchsplunk · commit 2d62b1741194 · 2025-09-30T18:23:39.000-07:00
diff --git a/content/en/ninja-workshops/14-cisco-ai-pods/8-deploy-vector-db.md b/content/en/ninja-workshops/14-cisco-ai-pods/8-deploy-vector-db.md
@@ -192,15 +192,24 @@ following example:
 
 ## Populate the Vector Database 
 
-Now that Weaviate is up and running, and we're capturing metrics from it 
-to ensure it's healthy, let's add some data to it that we'll use in the next part 
+Now that Weaviate is up and running, and we're capturing metrics from it, 
+let's add some data to it that we'll use in the next part 
 of the workshop with a custom application. 
 
 The application used to do this is based on 
 [LangChain Playbook for NeMo Retriever Text Embedding NIM](https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/playbook.html#generate-embeddings-with-text-embedding-nim). 
 
+Per the configuration in `./load-embeddings/k8s-job.yaml`, we're going to load 
+a [datasheet for the NVIDIA H200 Tensor Core GPU](https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf) 
+into our vector database. 
+
+This document includes information about NVIDIA's H200 GPUs that our large language model 
+isn't trained on. And in the next part of the workshop, we'll build an application that 
+uses an LLM to answer questions using the context from this document, which will be loaded 
+into the vector database. 
+
 We'll deploy a Kubernetes Job to our OpenShift cluster to load the embeddings. 
-A job is used rather than a pod to ensure that this process runs only once: 
+A Kubernetes Job is used rather than a Pod to ensure that this process runs only once: 
 
 ``` bash
 oc create namespace llm-app
diff --git a/content/en/ninja-workshops/14-cisco-ai-pods/9-deploy-llm-app.md b/content/en/ninja-workshops/14-cisco-ai-pods/9-deploy-llm-app.md
@@ -59,4 +59,15 @@ The NVIDIA H200 graphics card has 5536 MB of GDDR6 memory.
 ```
 
 {{% /tab %}}
-{{< /tabs >}}
+{{< /tabs >}}
+
+## View Trace Data in Splunk Observability Cloud
+
+In Splunk Observability Cloud, navigate to `APM` and then select `Service Map`. 
+Ensure the `llm-app` environment is selected.  You should see a service map 
+that looks like the following: 
+
+Click on `Traces` on the right-hand side menu.  Then select one of the slower running 
+traces. 
+
+
diff --git a/workshop/cisco-ai-pods/llm-app/app.py b/workshop/cisco-ai-pods/llm-app/app.py
@@ -1,6 +1,7 @@
 import os
 import weaviate
 import openlit
+import logging
 
 from flask import Flask, request
 from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
@@ -56,8 +57,8 @@ def ask_question():
     vector_store = WeaviateVectorStore(
         client=weaviate_client,
         embedding=embeddings_model,
-        index_name=None,
-        text_key="text"
+        index_name="CustomDocs",
+        text_key="page_content"
     )
 
     chain = (
@@ -70,8 +71,15 @@ def ask_question():
         | StrOutputParser()
     )
 
+    # Get the schema which contains all collections
+    schema = weaviate_client.collections.list_all()
+
+    logger.info("Available collections in Weaviate:")
+    for collection_name, collection_config in schema.items():
+        print(f"- {collection_name}")
+
     response = chain.invoke(question)
-    print(response)
+    logger.info(response)
 
     weaviate_client.close()
 
diff --git a/workshop/cisco-ai-pods/load-embeddings/app.py b/workshop/cisco-ai-pods/load-embeddings/app.py
@@ -1,5 +1,6 @@
 import os
 import weaviate
+import logging
 
 from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
 from langchain_community.document_loaders import PyPDFLoader
@@ -10,30 +11,41 @@
 DOCUMENT_URL = os.getenv('DOCUMENT_URL') # i.e. https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf
 EMBEDDINGS_MODEL_URL = os.getenv('EMBEDDINGS_MODEL_URL') # i.e. http://localhost:8001/v1
 
-# Load the specified PDF document
-loader = PyPDFLoader(
-    DOCUMENT_URL
-)
-
-documents = loader.load()
-
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-document_chunks = text_splitter.split_documents(documents)
-
-# Initialize and connect to a NeMo Retriever Text Embedding NIM (nvidia/llama-3.2-nv-embedqa-1b-v2)
-embeddings_model = NVIDIAEmbeddings(model="nvidia/llama-3.2-nv-embedqa-1b-v2",
-                                   base_url=EMBEDDINGS_MODEL_URL)
-
-weaviate_client = weaviate.connect_to_custom(
-    # url is:  http://weaviate.weaviate.svc.cluster.local:80
-    http_host=os.getenv('WEAVIATE_HTTP_HOST'),
-    http_port=os.getenv('WEAVIATE_HTTP_PORT'),
-    http_secure=False,
-    grpc_host=os.getenv('WEAVIATE_GRPC_HOST'),
-    grpc_port=os.getenv('WEAVIATE_GRPC_PORT'),
-    grpc_secure=False
-)
-
-db = WeaviateVectorStore.from_documents(document_chunks, embeddings_model, client=weaviate_client)
+logger.info(f"Loading data from {DOCUMENT_URL}")
+try:
+    # Load the specified PDF document
+    loader = PyPDFLoader(
+        DOCUMENT_URL
+    )
+
+    documents = loader.load()
+
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    document_chunks = text_splitter.split_documents(documents)
+
+    # Initialize and connect to a NeMo Retriever Text Embedding NIM (nvidia/llama-3.2-nv-embedqa-1b-v2)
+    embeddings_model = NVIDIAEmbeddings(model="nvidia/llama-3.2-nv-embedqa-1b-v2",
+                                       base_url=EMBEDDINGS_MODEL_URL)
+
+    weaviate_client = weaviate.connect_to_custom(
+        # url is:  http://weaviate.weaviate.svc.cluster.local:80
+        http_host=os.getenv('WEAVIATE_HTTP_HOST'),
+        http_port=os.getenv('WEAVIATE_HTTP_PORT'),
+        http_secure=False,
+        grpc_host=os.getenv('WEAVIATE_GRPC_HOST'),
+        grpc_port=os.getenv('WEAVIATE_GRPC_PORT'),
+        grpc_secure=False
+    )
+
+    db = WeaviateVectorStore.from_documents(
+        documents=document_chunks,
+        embedding=embeddings_model,
+        client=weaviate_client,
+        index_name="CustomDocs",
+        text_key="page_content"
+    )
+
+except Exception as e:
+    logger.error(f"Error loading data into Weaviate: {e}")
 
 weaviate_client.close()