Skip to content

Commit 2d62b17

Browse files
committed
updated sample app used by AI PODs workshop
1 parent db0175a commit 2d62b17

File tree

4 files changed

+72
-32
lines changed

4 files changed

+72
-32
lines changed

content/en/ninja-workshops/14-cisco-ai-pods/8-deploy-vector-db.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,15 +192,24 @@ following example:
192192

193193
## Populate the Vector Database
194194

195-
Now that Weaviate is up and running, and we're capturing metrics from it
196-
to ensure it's healthy, let's add some data to it that we'll use in the next part
195+
Now that Weaviate is up and running, and we're capturing metrics from it,
196+
let's add some data to it that we'll use in the next part
197197
of the workshop with a custom application.
198198

199199
The application used to do this is based on
200200
[LangChain Playbook for NeMo Retriever Text Embedding NIM](https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/playbook.html#generate-embeddings-with-text-embedding-nim).
201201

202+
Per the configuration in `./load-embeddings/k8s-job.yaml`, we're going to load
203+
a [datasheet for the NVIDIA H200 Tensor Core GPU](https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf)
204+
into our vector database.
205+
206+
This document includes information about NVIDIA's H200 GPUs that our large language model
207+
isn't trained on. And in the next part of the workshop, we'll build an application that
208+
uses an LLM to answer questions using the context from this document, which will be loaded
209+
into the vector database.
210+
202211
We'll deploy a Kubernetes Job to our OpenShift cluster to load the embeddings.
203-
A job is used rather than a pod to ensure that this process runs only once:
212+
A Kubernetes Job is used rather than a Pod to ensure that this process runs only once:
204213

205214
``` bash
206215
oc create namespace llm-app

content/en/ninja-workshops/14-cisco-ai-pods/9-deploy-llm-app.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,15 @@ The NVIDIA H200 graphics card has 5536 MB of GDDR6 memory.
5959
```
6060
6161
{{% /tab %}}
62-
{{< /tabs >}}
62+
{{< /tabs >}}
63+
64+
## View Trace Data in Splunk Observability Cloud
65+
66+
In Splunk Observability Cloud, navigate to `APM` and then select `Service Map`.
67+
Ensure the `llm-app` environment is selected. You should see a service map
68+
that looks like the following:
69+
70+
Click on `Traces` on the right-hand side menu. Then select one of the slower running
71+
traces.
72+
73+

workshop/cisco-ai-pods/llm-app/app.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import weaviate
33
import openlit
4+
import logging
45

56
from flask import Flask, request
67
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
@@ -56,8 +57,8 @@ def ask_question():
5657
vector_store = WeaviateVectorStore(
5758
client=weaviate_client,
5859
embedding=embeddings_model,
59-
index_name=None,
60-
text_key="text"
60+
index_name="CustomDocs",
61+
text_key="page_content"
6162
)
6263

6364
chain = (
@@ -70,8 +71,15 @@ def ask_question():
7071
| StrOutputParser()
7172
)
7273

74+
# Get the schema which contains all collections
75+
schema = weaviate_client.collections.list_all()
76+
77+
logger.info("Available collections in Weaviate:")
78+
for collection_name, collection_config in schema.items():
79+
print(f"- {collection_name}")
80+
7381
response = chain.invoke(question)
74-
print(response)
82+
logger.info(response)
7583

7684
weaviate_client.close()
7785

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import weaviate
3+
import logging
34

45
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
56
from langchain_community.document_loaders import PyPDFLoader
@@ -10,30 +11,41 @@
1011
DOCUMENT_URL = os.getenv('DOCUMENT_URL') # i.e. https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf
1112
EMBEDDINGS_MODEL_URL = os.getenv('EMBEDDINGS_MODEL_URL') # i.e. http://localhost:8001/v1
1213

13-
# Load the specified PDF document
14-
loader = PyPDFLoader(
15-
DOCUMENT_URL
16-
)
17-
18-
documents = loader.load()
19-
20-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
21-
document_chunks = text_splitter.split_documents(documents)
22-
23-
# Initialize and connect to a NeMo Retriever Text Embedding NIM (nvidia/llama-3.2-nv-embedqa-1b-v2)
24-
embeddings_model = NVIDIAEmbeddings(model="nvidia/llama-3.2-nv-embedqa-1b-v2",
25-
base_url=EMBEDDINGS_MODEL_URL)
26-
27-
weaviate_client = weaviate.connect_to_custom(
28-
# url is: http://weaviate.weaviate.svc.cluster.local:80
29-
http_host=os.getenv('WEAVIATE_HTTP_HOST'),
30-
http_port=os.getenv('WEAVIATE_HTTP_PORT'),
31-
http_secure=False,
32-
grpc_host=os.getenv('WEAVIATE_GRPC_HOST'),
33-
grpc_port=os.getenv('WEAVIATE_GRPC_PORT'),
34-
grpc_secure=False
35-
)
36-
37-
db = WeaviateVectorStore.from_documents(document_chunks, embeddings_model, client=weaviate_client)
14+
logger.info(f"Loading data from {DOCUMENT_URL}")
15+
try:
16+
# Load the specified PDF document
17+
loader = PyPDFLoader(
18+
DOCUMENT_URL
19+
)
20+
21+
documents = loader.load()
22+
23+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
24+
document_chunks = text_splitter.split_documents(documents)
25+
26+
# Initialize and connect to a NeMo Retriever Text Embedding NIM (nvidia/llama-3.2-nv-embedqa-1b-v2)
27+
embeddings_model = NVIDIAEmbeddings(model="nvidia/llama-3.2-nv-embedqa-1b-v2",
28+
base_url=EMBEDDINGS_MODEL_URL)
29+
30+
weaviate_client = weaviate.connect_to_custom(
31+
# url is: http://weaviate.weaviate.svc.cluster.local:80
32+
http_host=os.getenv('WEAVIATE_HTTP_HOST'),
33+
http_port=os.getenv('WEAVIATE_HTTP_PORT'),
34+
http_secure=False,
35+
grpc_host=os.getenv('WEAVIATE_GRPC_HOST'),
36+
grpc_port=os.getenv('WEAVIATE_GRPC_PORT'),
37+
grpc_secure=False
38+
)
39+
40+
db = WeaviateVectorStore.from_documents(
41+
documents=document_chunks,
42+
embedding=embeddings_model,
43+
client=weaviate_client,
44+
index_name="CustomDocs",
45+
text_key="page_content"
46+
)
47+
48+
except Exception as e:
49+
logger.error(f"Error loading data into Weaviate: {e}")
3850

3951
weaviate_client.close()

0 commit comments

Comments
 (0)