1+ import os
2+ import weaviate
3+ import openlit
4+
5+ from flask import Flask , request
6+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
7+ from langchain_core .prompts import ChatPromptTemplate
8+ from langchain_core .runnables import RunnablePassthrough
9+ from langchain_core .output_parsers import StrOutputParser
10+ from langchain_weaviate import WeaviateVectorStore
11+
12+ app = Flask (__name__ )
13+
14+ openlit .init ()
15+
16+ # Read environment variables
17+ INSTRUCT_MODEL_URL = os .getenv ('INSTRUCT_MODEL_URL' ) # i.e. http://localhost:8000/v1
18+ EMBEDDINGS_MODEL_URL = os .getenv ('EMBEDDINGS_MODEL_URL' ) # i.e. http://localhost:8001/v1
19+
20+ # connect to a LLM NIM at the specified endpoint, specifying a specific model
21+ llm = ChatNVIDIA (base_url = INSTRUCT_MODEL_URL , model = "meta/llama-3.2-1b-instruct" )
22+
23+ # Initialize and connect to a NeMo Retriever Text Embedding NIM (nvidia/llama-3.2-nv-embedqa-1b-v2)
24+ embeddings_model = NVIDIAEmbeddings (model = "nvidia/llama-3.2-nv-embedqa-1b-v2" ,
25+ base_url = EMBEDDINGS_MODEL_URL )
26+
27+ prompt = ChatPromptTemplate .from_messages ([
28+ ("system" ,
29+ "You are a helpful and friendly AI!"
30+ "Your responses should be concise and no longer than two sentences."
31+ "Do not hallucinate. Say you don't know if you don't have this information."
32+ # "Answer the question using only the context"
33+ "\n \n Question: {question}\n \n Context: {context}"
34+ ),
35+ ("user" , "{question}" )
36+ ])
37+
38+ @app .route ("/askquestion" , methods = ['POST' ])
39+ def ask_question ():
40+
41+ data = request .json
42+ question = data .get ('question' )
43+
44+ weaviate_client = weaviate .connect_to_custom (
45+ # url is: http://weaviate.weaviate.svc.cluster.local:80
46+ http_host = os .getenv ('WEAVIATE_HTTP_HOST' ),
47+ http_port = os .getenv ('WEAVIATE_HTTP_PORT' ),
48+ http_secure = False ,
49+ grpc_host = os .getenv ('WEAVIATE_GRPC_HOST' ),
50+ grpc_port = os .getenv ('WEAVIATE_GRPC_PORT' ),
51+ grpc_secure = False
52+ )
53+
54+ # connect with the vector store that was populated earlier
55+ vector_store = Weaviate (
56+ client = weaviate_client ,
57+ embedding = embeddings_model
58+ )
59+
60+ chain = (
61+ {
62+ "context" : vector_store .as_retriever (),
63+ "question" : RunnablePassthrough ()
64+ }
65+ | prompt
66+ | llm
67+ | StrOutputParser ()
68+ )
69+
70+ response = chain .invoke (question )
71+ print (response )
72+
73+ weaviate_client .close ()
74+
75+ return response
0 commit comments