|
9 | 9 | from fastapi.responses import JSONResponse |
10 | 10 | import os |
11 | 11 |
|
12 | | -# from config import Config |
13 | 12 | from logger import logger |
14 | 13 | from time import perf_counter |
15 | 14 |
|
16 | | -from vector_database.qdrant_vdb import QdrantVDB |
17 | | -from rag.ingestion_pipeline import IngestionPipeline |
18 | | -from rag.llm.chat_model import ChatModel |
19 | | -# from rag.llm.cloud_chat_model import CloudLLM |
| 15 | +from service.llm_service import generate_response |
| 16 | + |
| 17 | +from service.qdrant_service import ( |
| 18 | + file_already_uploaded, |
| 19 | + collection_already_exists, |
| 20 | + ingest_file |
| 21 | +) |
| 22 | + |
20 | 23 | from service.rag_service import ( |
21 | 24 | retrieve_similar_docs, |
22 | 25 | prepare_prompt, |
|
37 | 40 |
|
38 | 41 | router = APIRouter() |
39 | 42 |
|
40 | | -# Set vector database |
41 | | -qdrant = QdrantVDB() |
42 | | - |
43 | | -# Set chat model for local llm models |
44 | | -# Make calls to local models in openwebui hosted by the university |
45 | | -llm = ChatModel(model_name="llama3.3:latest") |
46 | | - |
47 | | -# Alternatively, we can switch to a chat model based on cloud models as well |
48 | | -# If you want to use other cloud models, please adjust model_name, |
49 | | -# model_provider, and api key |
50 | | -# accordingly |
51 | | - |
52 | | -# Examples: |
53 | | -# llm_cloud_anthropic = CloudLLM( |
54 | | -# model_name="claude-3-sonnet-20240229", |
55 | | -# model_provider="anthropic", |
56 | | -# api_key=Config.api_key_anthropic, |
57 | | -# ) |
58 | | -# llm_cloud_openai = CloudLLM( |
59 | | -# model_name="gpt-4-1106-preview", |
60 | | -# model_provider="openai", |
61 | | -# api_key=Config.api_key_openai, |
62 | | -# ) |
63 | | -# |
64 | | -# llm_cloud_mistral = CloudLLM( |
65 | | -# model_name="mistral-medium", |
66 | | -# model_provider="mistral", |
67 | | -# api_key=Config.api_key_mistral, |
68 | | -# ) |
69 | | - |
70 | | -# If no parameters are provided, the default cloud model will be openai. |
71 | | -# If a cloud model is wanted, please remove the comment |
72 | | -# for package import "CloudLLM" |
73 | | - |
74 | | -# Example: |
75 | | -# llm = CloudLLM() # same as llm_cloud_openai |
76 | | - |
77 | 43 |
|
78 | 44 | @router.post("/upload") |
79 | 45 | async def upload_file( |
@@ -102,26 +68,15 @@ async def upload_file( |
102 | 68 | buffer.write(await file.read()) |
103 | 69 |
|
104 | 70 | collection_name = f"recipes_{current_user.user_id}" |
105 | | - if ( |
106 | | - qdrant.client.collection_exists(collection_name) |
107 | | - and qdrant.collection_contains_file( |
108 | | - qdrant.client, |
109 | | - collection_name, |
110 | | - filename |
111 | | - ) |
112 | | - ): |
| 71 | + if file_already_uploaded(collection_name, filename): |
113 | 72 | logger.info( |
114 | 73 | "File already exists in qdrant for user %s", |
115 | 74 | current_user.username |
116 | 75 | ) |
117 | 76 | return {"message": f"File '{filename}' already uploaded."} |
118 | 77 |
|
119 | | - vector_store = qdrant.create_and_get_vector_storage(collection_name) |
120 | | - ingestion_pipeline = IngestionPipeline(vector_store=vector_store) |
121 | | - ingestion_pipeline.ingest(file_path, filename) |
122 | | - |
| 78 | + ingest_file(collection_name, file_path, filename) |
123 | 79 | file_upload_successfully_counter.inc() |
124 | | - |
125 | 80 | return {"message": "File processed successfully."} |
126 | 81 |
|
127 | 82 | except Exception as e: |
@@ -163,35 +118,32 @@ async def generate(request: Request): |
163 | 118 |
|
164 | 119 | try: |
165 | 120 | retrieved_docs = "" |
166 | | - if qdrant.client.collection_exists(collection_name): |
167 | | - vector_store = qdrant.create_and_get_vector_storage( |
168 | | - collection_name |
169 | | - ) |
| 121 | + if collection_already_exists(collection_name): |
170 | 122 | logger.info( |
171 | | - "Vector store is created for the collection %s for user_id %s", |
| 123 | + "Collection %s already exists for user_id %s", |
172 | 124 | collection_name, |
173 | 125 | user_id |
174 | 126 | ) |
175 | | - retrieved_docs = retrieve_similar_docs(vector_store, query) |
| 127 | + |
| 128 | + retrieved_docs = retrieve_similar_docs(collection_name, query) |
176 | 129 | logger.info("Similar docs retrieved from the vector store") |
177 | 130 |
|
178 | 131 | messages = process_raw_messages(messages_raw) |
179 | 132 | logger.info("Raw messages are processed for prompt preparation") |
180 | 133 |
|
181 | 134 | prompt = prepare_prompt( |
182 | | - llm.get_system_prompt(), |
183 | 135 | query, |
184 | 136 | retrieved_docs, |
185 | 137 | messages |
186 | 138 | ) |
187 | 139 | logger.info("Prompt is prepared") |
188 | 140 |
|
189 | | - response = llm.invoke(prompt) |
| 141 | + response = generate_response(prompt) |
190 | 142 | logger.info("Response is generated") |
191 | 143 |
|
192 | 144 | generation_successfully_counter.inc() |
193 | 145 |
|
194 | | - return JSONResponse(content={"response": response.content}) |
| 146 | + return JSONResponse(content={"response": response}) |
195 | 147 |
|
196 | 148 | except Exception as e: |
197 | 149 | logger.error("Generation is failed. Error: %s", str(e), exc_info=True) |
|
0 commit comments