Skip to content

Commit 25a3ab5

Browse files
authored
Merge pull request #997 from oracle-devrel/rag-marketing-update
Marketing updates
2 parents 27afd3b + b671e8e commit 25a3ab5

File tree

8 files changed

+730
-266
lines changed

8 files changed

+730
-266
lines changed

cloud-infrastructure/ai-infra-gpu/AI Infrastructure/nim-gpu-oke/README.md

Lines changed: 163 additions & 154 deletions
Large diffs are not rendered by default.

cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/README.md

Lines changed: 182 additions & 56 deletions
Large diffs are not rendered by default.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from llama_index.core import VectorStoreIndex, StorageContext, Settings
2+
from llama_index.vector_stores.qdrant import QdrantVectorStore
3+
from llama_index.readers.web import SitemapReader
4+
from qdrant_client import QdrantClient
5+
from langchain_community.embeddings import SentenceTransformerEmbeddings
6+
from langchain_community.llms import VLLM, VLLMOpenAI
7+
8+
from fastapi import HTTPException
9+
from pydantic import BaseModel
10+
11+
12+
def create_query_engine():
13+
loader = SitemapReader(html_to_text=True)
14+
# Reads pages from the web based on their sitemap.xml.
15+
# Other data connectors available.
16+
17+
documents = loader.load_data(
18+
sitemap_url='https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/frpj5kvxryk1/b/thisIsThePlace/o/latest.xml'
19+
)
20+
21+
# local Docker-based instance of Qdrant
22+
client = QdrantClient(
23+
location=":memory:"
24+
)
25+
embeddings = SentenceTransformerEmbeddings(
26+
model_name="all-MiniLM-L6-v2"
27+
)
28+
29+
# local instance of Mistral 7B v0.1 using vLLM inference server
30+
# and FlashAttention backend for performance. Model is downloaded
31+
# from HuggingFace (no accoutn needed).
32+
llm = VLLM(
33+
model="mistralai/Mistral-7B-Instruct-v0.2",
34+
gpu_memory_utilization=0.95,
35+
tensor_parallel_size=1, # inference distributed over X GPUs
36+
trust_remote_code=True, # mandatory for hf model
37+
max_new_tokens=128,
38+
top_k=10,
39+
top_p=0.95,
40+
temperature=0.8,
41+
vllm_kwargs={
42+
"swap_space": 1,
43+
"gpu_memory_utilization": 0.95,
44+
"max_model_len": 16384, # limitation due to unsufficient RAM
45+
"enforce_eager": True,
46+
},
47+
)
48+
49+
system_prompt="As a support engineer, your role is to leverage the information \
50+
in the context provided. Your task is to respond to queries based strictly \
51+
on the information available in the provided context. Do not create new \
52+
information under any circumstances. Refrain from repeating yourself. \
53+
Extract your response solely from the context mentioned above. \
54+
If the context does not contain relevant information for the question, \
55+
respond with 'How can I assist you with questions related to the document?"
56+
57+
Settings.llm = llm
58+
Settings.embed_model = embeddings
59+
Settings.chunk_size=1000
60+
Settings.chunk_overlap=100
61+
Settings.num_output = 256
62+
Settings.system_prompt=system_prompt
63+
64+
vector_store = QdrantVectorStore(
65+
client=client,
66+
collection_name="ansh"
67+
)
68+
69+
storage_context = StorageContext.from_defaults(
70+
vector_store=vector_store
71+
)
72+
73+
index = VectorStoreIndex.from_documents(
74+
documents,
75+
storage_context=storage_context
76+
)
77+
78+
query_engine = index.as_query_engine(llm=llm)
79+
80+
return query_engine
81+
82+
def get_query_response(query: str, query_engine):
83+
try:
84+
metadata = list()
85+
response = query_engine.query(query)
86+
for key in response.metadata.keys():
87+
print("Source: ", response.metadata[key]['Source'])
88+
metadata.append({"Source: ", response.metadata[key]['Source']})
89+
return {"response": response.response.strip(), "metadata": response.metadata}
90+
except Exception as e:
91+
raise HTTPException(status_code=500, detail=str(e))
92+
93+
94+
95+
96+
def main():
97+
query = "What are the document formats supported by the Vision service?"
98+
query_engine = create_query_engine()
99+
response = get_query_response(query, query_engine)
100+
print(response)
101+
102+
if __name__ == '__main__':
103+
main()
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
name: rag
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- _libgcc_mutex=0.1=conda_forge
6+
- _openmp_mutex=4.5=2_gnu
7+
- bzip2=1.0.8=hd590300_5
8+
- ca-certificates=2024.2.2=hbcca054_0
9+
- ld_impl_linux-64=2.40=h55db66e_0
10+
- libffi=3.4.2=h7f98852_5
11+
- libgcc-ng=13.2.0=hc881cc4_6
12+
- libgomp=13.2.0=hc881cc4_6
13+
- libnsl=2.0.1=hd590300_0
14+
- libsqlite=3.45.3=h2797004_0
15+
- libuuid=2.38.1=h0b41bf4_0
16+
- libxcrypt=4.4.36=hd590300_1
17+
- libzlib=1.2.13=hd590300_5
18+
- ncurses=6.4.20240210=h59595ed_0
19+
- openssl=3.2.1=hd590300_1
20+
- pip=24.0=pyhd8ed1ab_0
21+
- python=3.10.14=hd12c33a_0_cpython
22+
- readline=8.2=h8228510_1
23+
- setuptools=69.5.1=pyhd8ed1ab_0
24+
- tk=8.6.13=noxft_h4845f30_101
25+
- wheel=0.43.0=pyhd8ed1ab_1
26+
- xz=5.2.6=h166bdaf_0
27+
- pip:
28+
- aiohttp==3.9.5
29+
- aiosignal==1.3.1
30+
- annotated-types==0.6.0
31+
- anyio==4.3.0
32+
- async-timeout==4.0.3
33+
- attrs==23.2.0
34+
- beautifulsoup4==4.12.3
35+
- certifi==2024.2.2
36+
- charset-normalizer==3.3.2
37+
- chromedriver-autoinstaller==0.6.4
38+
- click==8.1.7
39+
- cloudpickle==3.0.0
40+
- cmake==3.29.2
41+
- cssselect==1.2.0
42+
- dataclasses-json==0.6.4
43+
- deprecated==1.2.14
44+
- dirtyjson==1.0.8
45+
- diskcache==5.6.3
46+
- distro==1.9.0
47+
- einops==0.7.0
48+
- exceptiongroup==1.2.1
49+
- fastapi==0.110.2
50+
- feedfinder2==0.0.4
51+
- feedparser==6.0.11
52+
- filelock==3.13.4
53+
- flash-attn==2.5.7
54+
- frozenlist==1.4.1
55+
- fsspec==2024.3.1
56+
- greenlet==3.0.3
57+
- grpcio==1.62.2
58+
- grpcio-tools==1.62.2
59+
- h11==0.14.0
60+
- h2==4.1.0
61+
- hpack==4.0.0
62+
- html2text==2020.1.16
63+
- httpcore==1.0.5
64+
- httptools==0.6.1
65+
- httpx==0.27.0
66+
- huggingface-hub==0.22.2
67+
- hyperframe==6.0.1
68+
- idna==3.7
69+
- interegular==0.3.3
70+
- jieba3k==0.35.1
71+
- jinja2==3.1.3
72+
- joblib==1.4.0
73+
- jsonpatch==1.33
74+
- jsonpointer==2.4
75+
- jsonschema==4.21.1
76+
- jsonschema-specifications==2023.12.1
77+
- langchain==0.1.16
78+
- langchain-community==0.0.34
79+
- langchain-core==0.1.46
80+
- langchain-text-splitters==0.0.1
81+
- langsmith==0.1.51
82+
- lark==1.1.9
83+
- llama-hub==0.0.79.post1
84+
- llama-index==0.10.32
85+
- llama-index-agent-openai==0.2.3
86+
- llama-index-cli==0.1.12
87+
- llama-index-core==0.10.32
88+
- llama-index-embeddings-langchain==0.1.2
89+
- llama-index-embeddings-openai==0.1.9
90+
- llama-index-indices-managed-llama-cloud==0.1.5
91+
- llama-index-legacy==0.9.48
92+
- llama-index-llms-anyscale==0.1.3
93+
- llama-index-llms-langchain==0.1.3
94+
- llama-index-llms-openai==0.1.16
95+
- llama-index-multi-modal-llms-openai==0.1.5
96+
- llama-index-program-openai==0.1.6
97+
- llama-index-question-gen-openai==0.1.3
98+
- llama-index-readers-file==0.1.19
99+
- llama-index-readers-llama-parse==0.1.4
100+
- llama-index-readers-web==0.1.10
101+
- llama-index-vector-stores-qdrant==0.2.8
102+
- llama-parse==0.4.2
103+
- llamaindex-py-client==0.1.18
104+
- llvmlite==0.42.0
105+
- lm-format-enforcer==0.9.8
106+
- lxml==5.2.1
107+
- markupsafe==2.1.5
108+
- marshmallow==3.21.1
109+
- mpmath==1.3.0
110+
- msgpack==1.0.8
111+
- multidict==6.0.5
112+
- mypy-extensions==1.0.0
113+
- nest-asyncio==1.6.0
114+
- networkx==3.3
115+
- newspaper3k==0.2.8
116+
- ninja==1.11.1.1
117+
- nltk==3.8.1
118+
- numba==0.59.1
119+
- numpy==1.26.4
120+
- nvidia-cublas-cu12==12.1.3.1
121+
- nvidia-cuda-cupti-cu12==12.1.105
122+
- nvidia-cuda-nvrtc-cu12==12.1.105
123+
- nvidia-cuda-runtime-cu12==12.1.105
124+
- nvidia-cudnn-cu12==8.9.2.26
125+
- nvidia-cufft-cu12==11.0.2.54
126+
- nvidia-curand-cu12==10.3.2.106
127+
- nvidia-cusolver-cu12==11.4.5.107
128+
- nvidia-cusparse-cu12==12.1.0.106
129+
- nvidia-ml-py==12.550.52
130+
- nvidia-nccl-cu12==2.19.3
131+
- nvidia-nvjitlink-cu12==12.4.127
132+
- nvidia-nvtx-cu12==12.1.105
133+
- openai==1.23.6
134+
- orjson==3.10.1
135+
- outcome==1.3.0.post0
136+
- outlines==0.0.34
137+
- packaging==23.2
138+
- pandas==2.2.2
139+
- pillow==10.3.0
140+
- playwright==1.43.0
141+
- portalocker==2.8.2
142+
- prometheus-client==0.20.0
143+
- protobuf==4.25.3
144+
- psutil==5.9.8
145+
- py-cpuinfo==9.0.0
146+
- pyaml==23.12.0
147+
- pydantic==2.7.1
148+
- pydantic-core==2.18.2
149+
- pyee==11.1.0
150+
- pypdf==4.2.0
151+
- pysocks==1.7.1
152+
- python-dateutil==2.9.0.post0
153+
- python-dotenv==1.0.1
154+
- pytz==2024.1
155+
- pyyaml==6.0.1
156+
- qdrant-client==1.9.0
157+
- ray==2.12.0
158+
- referencing==0.35.0
159+
- regex==2024.4.16
160+
- requests==2.31.0
161+
- requests-file==2.0.0
162+
- retrying==1.3.4
163+
- rpds-py==0.18.0
164+
- safetensors==0.4.3
165+
- scikit-learn==1.4.2
166+
- scipy==1.13.0
167+
- selenium==4.20.0
168+
- sentence-transformers==2.7.0
169+
- sentencepiece==0.2.0
170+
- sgmllib3k==1.0.0
171+
- six==1.16.0
172+
- sniffio==1.3.1
173+
- sortedcontainers==2.4.0
174+
- soupsieve==2.5
175+
- sqlalchemy==2.0.29
176+
- starlette==0.37.2
177+
- striprtf==0.0.26
178+
- sympy==1.12
179+
- tenacity==8.2.3
180+
- threadpoolctl==3.4.0
181+
- tiktoken==0.6.0
182+
- tinysegmenter==0.3
183+
- tldextract==5.1.2
184+
- tokenizers==0.19.1
185+
- torch==2.2.1
186+
- tqdm==4.66.2
187+
- transformers==4.40.1
188+
- trio==0.25.0
189+
- trio-websocket==0.11.1
190+
- triton==2.2.0
191+
- typing-extensions==4.11.0
192+
- typing-inspect==0.9.0
193+
- tzdata==2024.1
194+
- urllib3==2.2.1
195+
- uvicorn==0.29.0
196+
- uvloop==0.19.0
197+
- vllm==0.4.1
198+
- vllm-nccl-cu12==2.18.1.0.4.0
199+
- watchfiles==0.21.0
200+
- websockets==12.0
201+
- wrapt==1.16.0
202+
- wsproto==1.2.0
203+
- xformers==0.0.25
204+
- yarl==1.9.4
205+
prefix: /home/ubuntu/miniforge3/envs/rag
134 KB
Loading
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import requests
2+
from api_rag import create_query_engine, get_query_response
3+
4+
def call_api(query):
5+
url = 'http://localhost:8000/'
6+
response = requests.get(url, params={'query': query})
7+
if response.status_code == 200:
8+
return response.json()
9+
else:
10+
raise Exception(f"Failed to get response from API, status code: {response.status_code}")
11+
12+
13+
# Example usage
14+
if __name__ == "__main__":
15+
queries = [
16+
"What are the document formats supported by the Vision service?",
17+
"How can I reset my password?",
18+
"What is the maximum file size for uploads?",
19+
"Can you provide the API endpoint for retrieving user profiles?",
20+
"What are the security measures in place for API transactions?",
21+
"How do I update my billing information?",
22+
"What types of notifications will users receive?",
23+
"Is there a way to retrieve historical data?",
24+
"Can the system integrate with third-party services?",
25+
"What are the system requirements for installing the client application?"
26+
]
27+
engine = create_query_engine()
28+
for i in queries:
29+
try:
30+
result = get_query_response(i, engine)
31+
print("API Response:", result)
32+
except Exception as e:
33+
print(str(e))

cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/rag-langchain-vllm-mistral.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
documents = loader.load_data(
1414
sitemap_url='https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/frpj5kvxryk1/b/thisIsThePlace/o/latest.xml'
1515
)
16-
for document in documents:
17-
print(document.metadata['Source'])
16+
# for document in documents:
17+
# print(document.metadata['Source'])
1818

1919
# local Docker-based instance of Qdrant
2020
client = QdrantClient(

0 commit comments

Comments
 (0)