splunk
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/9-deploy-llm-app.md‎
Lines changed: 38 additions & 4 deletions b/‎content/en/ninja-workshops/14-cisco-ai-pods/9-deploy-llm-app.md‎
Lines changed: 38 additions & 4 deletions
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/images/DocumentRetrieval.png‎
555 KB b/‎content/en/ninja-workshops/14-cisco-ai-pods/images/DocumentRetrieval.png‎
555 KB
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/images/LLMResponse.png‎
487 KB b/‎content/en/ninja-workshops/14-cisco-ai-pods/images/LLMResponse.png‎
487 KB
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/images/PromptTemplate.png‎
612 KB b/‎content/en/ninja-workshops/14-cisco-ai-pods/images/PromptTemplate.png‎
612 KB
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/images/ServiceMap.png‎
301 KB b/‎content/en/ninja-workshops/14-cisco-ai-pods/images/ServiceMap.png‎
301 KB
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/images/Trace.png‎
453 KB b/‎content/en/ninja-workshops/14-cisco-ai-pods/images/Trace.png‎
453 KB
diff --git a/‎content/en/ninja-workshops/14-cisco-ai-pods/images/WeaviateNavigator.png‎
49 Bytes b/‎content/en/ninja-workshops/14-cisco-ai-pods/images/WeaviateNavigator.png‎
49 Bytes
diff --git a/‎workshop/cisco-ai-pods/llm-app/Dockerfile‎
Lines changed: 4 additions & 0 deletions b/‎workshop/cisco-ai-pods/llm-app/Dockerfile‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎workshop/cisco-ai-pods/llm-app/app.py‎
Lines changed: 54 additions & 45 deletions b/‎workshop/cisco-ai-pods/llm-app/app.py‎
Lines changed: 54 additions & 45 deletions
diff --git a/‎workshop/cisco-ai-pods/llm-app/k8s-manifest.yaml‎
Lines changed: 2 additions & 0 deletions b/‎workshop/cisco-ai-pods/llm-app/k8s-manifest.yaml‎
Lines changed: 2 additions & 0 deletions
@@ -15,8 +15,7 @@ Let's deploy an application to our OpenShift cluster that answers questions
 using the context that we loaded into the Weaviate vector database earlier. 
 
 ``` bash
-cd workshop/cisco-ai-pods/llm-app
-oc apply -f k8s-manifest.yaml
+oc apply -f ./llm-app/k8s-manifest.yaml
 ```
 
 > Note: to build a Docker image for this Python application, we executed the following commands:
@@ -55,7 +54,7 @@ curl -X "POST" \
 {{% tab title="Example Output" %}}
 
 ``` bash
-The NVIDIA H200 graphics card has 5536 MB of GDDR6 memory.
+The NVIDIA H200 has 141GB of HBM3e memory, which is twice the capacity of the NVIDIA H100 Tensor Core GPU with 1.4X more memory bandwidth.
 ```
 
 {{% /tab %}}
@@ -67,7 +66,42 @@ In Splunk Observability Cloud, navigate to `APM` and then select `Service Map`.
 Ensure the `llm-app` environment is selected.  You should see a service map 
 that looks like the following: 
 
+![Service Map](../images/ServiceMap.png)
+
 Click on `Traces` on the right-hand side menu.  Then select one of the slower running 
-traces. 
+traces. It should look like the following example: 
+
+![Trace](../images/Trace.png)
+
+The trace shows all the interactions that our application executed to return an answer 
+to the users question (i.e. "How much memory does the NVIDIA H200 have?")
+
+For example, we can see where our application performed a similarity search to look 
+for documents related to the question at hand in the Weaviate vector database: 
+
+![Document Retrieval](../images/DocumentRetrieval.png)
+
+We can also see how the application created a prompt to send to the LLM, including the 
+context that was retrieved from the vector database: 
+
+![Prompt Template](../images/PromptTemplate.png)
+
+Finally, we can see the response from the LLM, the time it took, and the number of 
+input and output tokens utilized: 
+
+![LLM Response](../images/LLMResponse.png)
+
+## Wrap-Up 
 
+We hope you enjoyed this workshop, which provided hands-on experience deploying and working 
+with several of the technologies that are used to monitor Cisco AI PODs with 
+Splunk Observability Cloud. Specifically, you had the opportunity to: 
 
+* Deploy a RedHat OpenShift cluster with GPU-based worker nodes. 
+* Deploy the NVIDIA NIM Operator and NVIDIA GPU Operator. 
+* Deploy Large Language Models (LLMs) using NVIDIA NIM to the cluster. 
+* Deploy the OpenTelemetry Collector in the Red Hat OpenShift cluster.
+* Add Prometheus receivers to the collector to ingest infrastructure metrics.
+* Deploy the Weaviate vector database to the cluster. 
+* Instrument Python services that interact with Large Language Models (LLMs) with OpenTelemetry. 
+* Understand which details which OpenTelemetry captures in the trace from applications that interact with LLMs. 
@@ -13,6 +13,10 @@ RUN pip install -r requirements.txt
 # Add additional OpenTelemetry instrumentation packages
 RUN opentelemetry-bootstrap --action=install
 
+# Remove unwanted instrumentation to ensure we get a clean trace
+RUN pip uninstall -y opentelemetry-instrumentation-httpx
+RUN pip uninstall -y opentelemetry-instrumentation-requests
+
 # Copy the application code
 COPY . /app
 
 
@@ -11,9 +11,12 @@
 from langchain_core.output_parsers import StrOutputParser
 from langchain_weaviate import WeaviateVectorStore
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 app = Flask(__name__)
 
-openlit.init()
+openlit.init(environment="llm-app")
 
 # Read environment variables
 INSTRUCT_MODEL_URL = os.getenv('INSTRUCT_MODEL_URL') # i.e. http://localhost:8000/v1
@@ -40,47 +43,53 @@
 @app.route("/askquestion", methods=['POST'])
 def ask_question():
 
-    data = request.json
-    question = data.get('question')
-
-    weaviate_client = weaviate.connect_to_custom(
-        # url is:  http://weaviate.weaviate.svc.cluster.local:80
-        http_host=os.getenv('WEAVIATE_HTTP_HOST'),
-        http_port=os.getenv('WEAVIATE_HTTP_PORT'),
-        http_secure=False,
-        grpc_host=os.getenv('WEAVIATE_GRPC_HOST'),
-        grpc_port=os.getenv('WEAVIATE_GRPC_PORT'),
-        grpc_secure=False
-    )
-
-    # connect with the vector store that was populated earlier
-    vector_store = WeaviateVectorStore(
-        client=weaviate_client,
-        embedding=embeddings_model,
-        index_name="CustomDocs",
-        text_key="page_content"
-    )
-
-    chain = (
-        {
-            "context": vector_store.as_retriever(),
-            "question": RunnablePassthrough()
-        }
-        | prompt
-        | llm
-        | StrOutputParser()
-    )
-
-    # Get the schema which contains all collections
-    schema = weaviate_client.collections.list_all()
-
-    logger.info("Available collections in Weaviate:")
-    for collection_name, collection_config in schema.items():
-        print(f"- {collection_name}")
-
-    response = chain.invoke(question)
-    logger.info(response)
-
-    weaviate_client.close()
-
-    return response
+    logger.info(f"Responding to question")
+    try:
+        data = request.json
+        question = data.get('question')
+
+        weaviate_client = weaviate.connect_to_custom(
+            # url is:  http://weaviate.weaviate.svc.cluster.local:80
+            http_host=os.getenv('WEAVIATE_HTTP_HOST'),
+            http_port=os.getenv('WEAVIATE_HTTP_PORT'),
+            http_secure=False,
+            grpc_host=os.getenv('WEAVIATE_GRPC_HOST'),
+            grpc_port=os.getenv('WEAVIATE_GRPC_PORT'),
+            grpc_secure=False
+        )
+
+        # connect with the vector store that was populated earlier
+        vector_store = WeaviateVectorStore(
+            client=weaviate_client,
+            embedding=embeddings_model,
+            index_name="CustomDocs",
+            text_key="page_content"
+        )
+
+        chain = (
+            {
+                "context": vector_store.as_retriever(),
+                "question": RunnablePassthrough()
+            }
+            | prompt
+            | llm
+            | StrOutputParser()
+        )
+
+        # Get the schema which contains all collections
+        schema = weaviate_client.collections.list_all()
+
+        logger.info("Available collections in Weaviate:")
+        for collection_name, collection_config in schema.items():
+            print(f"- {collection_name}")
+
+        response = chain.invoke(question)
+        logger.info(response)
+
+        weaviate_client.close()
+
+        return response
+
+    except Exception as e:
+        logger.error(f"Error responding to question: {e}")
+        return None
@@ -39,6 +39,8 @@ spec:
               # filter out health check requests to the root URL
             - name: OTEL_PYTHON_EXCLUDED_URLS
               value: "^(https?://)?[^/]+(/)?$"
+            - name: OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
+              value: "httpx,requests"
             - name: SPLUNK_PROFILER_ENABLED
               value: "true"
             - name: INSTRUCT_MODEL_URL