oracle-devrel
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/nim-gpu-oke/README.md
Lines changed: 163 additions & 154 deletions b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/nim-gpu-oke/README.md
Lines changed: 163 additions & 154 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/README.md
Lines changed: 182 additions & 56 deletions b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/README.md
Lines changed: 182 additions & 56 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/api_rag.py
Lines changed: 103 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/api_rag.py
Lines changed: 103 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/environment.yml
Lines changed: 205 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/environment.yml
Lines changed: 205 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/img/create_gpu_accelerated.PNG
134 KB b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/img/create_gpu_accelerated.PNG
134 KB
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/invoke_api.py
Lines changed: 33 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/invoke_api.py
Lines changed: 33 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/rag-langchain-vllm-mistral.py
Lines changed: 2 additions & 2 deletions b/‎cloud-infrastructure/ai-infra-gpu/AI Infrastructure/rag-langchain-vllm-mistral/rag-langchain-vllm-mistral.py
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,103 @@
+from llama_index.core import VectorStoreIndex, StorageContext, Settings
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+from llama_index.readers.web import SitemapReader
+from qdrant_client import QdrantClient
+from langchain_community.embeddings import SentenceTransformerEmbeddings
+from langchain_community.llms import VLLM, VLLMOpenAI
+
+from fastapi import HTTPException
+from pydantic import BaseModel
+
+
+def create_query_engine():
+    loader = SitemapReader(html_to_text=True)
+    # Reads pages from the web based on their sitemap.xml.
+    # Other data connectors available.
+
+    documents = loader.load_data(
+        sitemap_url='https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/frpj5kvxryk1/b/thisIsThePlace/o/latest.xml'
+    )
+
+    # local Docker-based instance of Qdrant
+    client = QdrantClient(
+        location=":memory:"
+    )
+    embeddings = SentenceTransformerEmbeddings(
+        model_name="all-MiniLM-L6-v2"
+    )
+
+    # local instance of Mistral 7B v0.1 using vLLM inference server
+    # and FlashAttention backend for performance. Model is downloaded
+    # from HuggingFace (no accoutn needed).
+    llm = VLLM(
+        model="mistralai/Mistral-7B-Instruct-v0.2",
+        gpu_memory_utilization=0.95,
+        tensor_parallel_size=1, # inference distributed over X GPUs
+        trust_remote_code=True, # mandatory for hf model
+        max_new_tokens=128,
+        top_k=10,
+        top_p=0.95,
+        temperature=0.8,
+        vllm_kwargs={
+            "swap_space": 1,
+            "gpu_memory_utilization": 0.95,
+            "max_model_len": 16384, # limitation due to unsufficient RAM
+            "enforce_eager": True,
+        },
+    )
+
+    system_prompt="As a support engineer, your role is to leverage the information \
+        in the context provided. Your task is to respond to queries based strictly \
+        on the information available in the provided context. Do not create new \
+        information under any circumstances. Refrain from repeating yourself. \
+        Extract your response solely from the context mentioned above. \
+        If the context does not contain relevant information for the question, \
+        respond with 'How can I assist you with questions related to the document?"
+
+    Settings.llm = llm
+    Settings.embed_model = embeddings
+    Settings.chunk_size=1000
+    Settings.chunk_overlap=100
+    Settings.num_output = 256
+    Settings.system_prompt=system_prompt
+
+    vector_store = QdrantVectorStore(
+        client=client,
+        collection_name="ansh"
+    )
+
+    storage_context = StorageContext.from_defaults(
+        vector_store=vector_store
+    )
+
+    index = VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context
+    )
+
+    query_engine = index.as_query_engine(llm=llm)
+
+    return query_engine
+
+def get_query_response(query: str, query_engine):
+    try:
+        metadata = list()
+        response = query_engine.query(query)
+        for key in response.metadata.keys():
+            print("Source: ", response.metadata[key]['Source'])
+            metadata.append({"Source: ", response.metadata[key]['Source']})
+        return {"response": response.response.strip(), "metadata": response.metadata}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+
+
+def main():
+    query = "What are the document formats supported by the Vision service?"
+    query_engine = create_query_engine()
+    response = get_query_response(query, query_engine)
+    print(response)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,205 @@
+name: rag
+channels:
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - bzip2=1.0.8=hd590300_5
+  - ca-certificates=2024.2.2=hbcca054_0
+  - ld_impl_linux-64=2.40=h55db66e_0
+  - libffi=3.4.2=h7f98852_5
+  - libgcc-ng=13.2.0=hc881cc4_6
+  - libgomp=13.2.0=hc881cc4_6
+  - libnsl=2.0.1=hd590300_0
+  - libsqlite=3.45.3=h2797004_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libxcrypt=4.4.36=hd590300_1
+  - libzlib=1.2.13=hd590300_5
+  - ncurses=6.4.20240210=h59595ed_0
+  - openssl=3.2.1=hd590300_1
+  - pip=24.0=pyhd8ed1ab_0
+  - python=3.10.14=hd12c33a_0_cpython
+  - readline=8.2=h8228510_1
+  - setuptools=69.5.1=pyhd8ed1ab_0
+  - tk=8.6.13=noxft_h4845f30_101
+  - wheel=0.43.0=pyhd8ed1ab_1
+  - xz=5.2.6=h166bdaf_0
+  - pip:
+      - aiohttp==3.9.5
+      - aiosignal==1.3.1
+      - annotated-types==0.6.0
+      - anyio==4.3.0
+      - async-timeout==4.0.3
+      - attrs==23.2.0
+      - beautifulsoup4==4.12.3
+      - certifi==2024.2.2
+      - charset-normalizer==3.3.2
+      - chromedriver-autoinstaller==0.6.4
+      - click==8.1.7
+      - cloudpickle==3.0.0
+      - cmake==3.29.2
+      - cssselect==1.2.0
+      - dataclasses-json==0.6.4
+      - deprecated==1.2.14
+      - dirtyjson==1.0.8
+      - diskcache==5.6.3
+      - distro==1.9.0
+      - einops==0.7.0
+      - exceptiongroup==1.2.1
+      - fastapi==0.110.2
+      - feedfinder2==0.0.4
+      - feedparser==6.0.11
+      - filelock==3.13.4
+      - flash-attn==2.5.7
+      - frozenlist==1.4.1
+      - fsspec==2024.3.1
+      - greenlet==3.0.3
+      - grpcio==1.62.2
+      - grpcio-tools==1.62.2
+      - h11==0.14.0
+      - h2==4.1.0
+      - hpack==4.0.0
+      - html2text==2020.1.16
+      - httpcore==1.0.5
+      - httptools==0.6.1
+      - httpx==0.27.0
+      - huggingface-hub==0.22.2
+      - hyperframe==6.0.1
+      - idna==3.7
+      - interegular==0.3.3
+      - jieba3k==0.35.1
+      - jinja2==3.1.3
+      - joblib==1.4.0
+      - jsonpatch==1.33
+      - jsonpointer==2.4
+      - jsonschema==4.21.1
+      - jsonschema-specifications==2023.12.1
+      - langchain==0.1.16
+      - langchain-community==0.0.34
+      - langchain-core==0.1.46
+      - langchain-text-splitters==0.0.1
+      - langsmith==0.1.51
+      - lark==1.1.9
+      - llama-hub==0.0.79.post1
+      - llama-index==0.10.32
+      - llama-index-agent-openai==0.2.3
+      - llama-index-cli==0.1.12
+      - llama-index-core==0.10.32
+      - llama-index-embeddings-langchain==0.1.2
+      - llama-index-embeddings-openai==0.1.9
+      - llama-index-indices-managed-llama-cloud==0.1.5
+      - llama-index-legacy==0.9.48
+      - llama-index-llms-anyscale==0.1.3
+      - llama-index-llms-langchain==0.1.3
+      - llama-index-llms-openai==0.1.16
+      - llama-index-multi-modal-llms-openai==0.1.5
+      - llama-index-program-openai==0.1.6
+      - llama-index-question-gen-openai==0.1.3
+      - llama-index-readers-file==0.1.19
+      - llama-index-readers-llama-parse==0.1.4
+      - llama-index-readers-web==0.1.10
+      - llama-index-vector-stores-qdrant==0.2.8
+      - llama-parse==0.4.2
+      - llamaindex-py-client==0.1.18
+      - llvmlite==0.42.0
+      - lm-format-enforcer==0.9.8
+      - lxml==5.2.1
+      - markupsafe==2.1.5
+      - marshmallow==3.21.1
+      - mpmath==1.3.0
+      - msgpack==1.0.8
+      - multidict==6.0.5
+      - mypy-extensions==1.0.0
+      - nest-asyncio==1.6.0
+      - networkx==3.3
+      - newspaper3k==0.2.8
+      - ninja==1.11.1.1
+      - nltk==3.8.1
+      - numba==0.59.1
+      - numpy==1.26.4
+      - nvidia-cublas-cu12==12.1.3.1
+      - nvidia-cuda-cupti-cu12==12.1.105
+      - nvidia-cuda-nvrtc-cu12==12.1.105
+      - nvidia-cuda-runtime-cu12==12.1.105
+      - nvidia-cudnn-cu12==8.9.2.26
+      - nvidia-cufft-cu12==11.0.2.54
+      - nvidia-curand-cu12==10.3.2.106
+      - nvidia-cusolver-cu12==11.4.5.107
+      - nvidia-cusparse-cu12==12.1.0.106
+      - nvidia-ml-py==12.550.52
+      - nvidia-nccl-cu12==2.19.3
+      - nvidia-nvjitlink-cu12==12.4.127
+      - nvidia-nvtx-cu12==12.1.105
+      - openai==1.23.6
+      - orjson==3.10.1
+      - outcome==1.3.0.post0
+      - outlines==0.0.34
+      - packaging==23.2
+      - pandas==2.2.2
+      - pillow==10.3.0
+      - playwright==1.43.0
+      - portalocker==2.8.2
+      - prometheus-client==0.20.0
+      - protobuf==4.25.3
+      - psutil==5.9.8
+      - py-cpuinfo==9.0.0
+      - pyaml==23.12.0
+      - pydantic==2.7.1
+      - pydantic-core==2.18.2
+      - pyee==11.1.0
+      - pypdf==4.2.0
+      - pysocks==1.7.1
+      - python-dateutil==2.9.0.post0
+      - python-dotenv==1.0.1
+      - pytz==2024.1
+      - pyyaml==6.0.1
+      - qdrant-client==1.9.0
+      - ray==2.12.0
+      - referencing==0.35.0
+      - regex==2024.4.16
+      - requests==2.31.0
+      - requests-file==2.0.0
+      - retrying==1.3.4
+      - rpds-py==0.18.0
+      - safetensors==0.4.3
+      - scikit-learn==1.4.2
+      - scipy==1.13.0
+      - selenium==4.20.0
+      - sentence-transformers==2.7.0
+      - sentencepiece==0.2.0
+      - sgmllib3k==1.0.0
+      - six==1.16.0
+      - sniffio==1.3.1
+      - sortedcontainers==2.4.0
+      - soupsieve==2.5
+      - sqlalchemy==2.0.29
+      - starlette==0.37.2
+      - striprtf==0.0.26
+      - sympy==1.12
+      - tenacity==8.2.3
+      - threadpoolctl==3.4.0
+      - tiktoken==0.6.0
+      - tinysegmenter==0.3
+      - tldextract==5.1.2
+      - tokenizers==0.19.1
+      - torch==2.2.1
+      - tqdm==4.66.2
+      - transformers==4.40.1
+      - trio==0.25.0
+      - trio-websocket==0.11.1
+      - triton==2.2.0
+      - typing-extensions==4.11.0
+      - typing-inspect==0.9.0
+      - tzdata==2024.1
+      - urllib3==2.2.1
+      - uvicorn==0.29.0
+      - uvloop==0.19.0
+      - vllm==0.4.1
+      - vllm-nccl-cu12==2.18.1.0.4.0
+      - watchfiles==0.21.0
+      - websockets==12.0
+      - wrapt==1.16.0
+      - wsproto==1.2.0
+      - xformers==0.0.25
+      - yarl==1.9.4
+prefix: /home/ubuntu/miniforge3/envs/rag
@@ -0,0 +1,33 @@
+import requests
+from api_rag import create_query_engine, get_query_response
+
+def call_api(query):
+    url = 'http://localhost:8000/'
+    response = requests.get(url, params={'query': query})
+    if response.status_code == 200:
+        return response.json()
+    else:
+        raise Exception(f"Failed to get response from API, status code: {response.status_code}")
+
+
+# Example usage
+if __name__ == "__main__":
+    queries = [
+        "What are the document formats supported by the Vision service?",
+        "How can I reset my password?",
+        "What is the maximum file size for uploads?",
+        "Can you provide the API endpoint for retrieving user profiles?",
+        "What are the security measures in place for API transactions?",
+        "How do I update my billing information?",
+        "What types of notifications will users receive?",
+        "Is there a way to retrieve historical data?",
+        "Can the system integrate with third-party services?",
+        "What are the system requirements for installing the client application?"
+    ]
+    engine = create_query_engine()
+    for i in queries:
+        try:
+            result = get_query_response(i, engine)
+            print("API Response:", result)
+        except Exception as e:
+            print(str(e))
@@ -13,8 +13,8 @@
 documents = loader.load_data(
     sitemap_url='https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/frpj5kvxryk1/b/thisIsThePlace/o/latest.xml'
 )
-for document in documents:
-    print(document.metadata['Source'])
+# for document in documents:
+#    print(document.metadata['Source'])
 
 # local Docker-based instance of Qdrant
 client = QdrantClient(