Skip to content

Commit bc2fd47

Browse files
committed
feat: added comprehensive tests, default model qwen
1 parent aab4bf4 commit bc2fd47

File tree

3 files changed

+164
-2
lines changed

3 files changed

+164
-2
lines changed

agentic_rag/OraDBVectorStore.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,72 @@ def query_repo_collection(self, query: str, n_results: int = 3) -> List[Dict[str
338338
formatted_results.append(result)
339339

340340
return formatted_results
341+
342+
def get_collection_count(self, collection_name: str) -> int:
343+
"""Get the total number of chunks in a collection
344+
345+
Args:
346+
collection_name: Name of the collection (pdf_documents, web_documents, repository_documents, general_knowledge)
347+
348+
Returns:
349+
Number of chunks in the collection
350+
"""
351+
# Map collection names to table names
352+
collection_map = {
353+
"pdf_documents": "PDFCollection",
354+
"web_documents": "WebCollection",
355+
"repository_documents": "RepoCollection",
356+
"general_knowledge": "GeneralCollection"
357+
}
358+
359+
table_name = collection_map.get(collection_name)
360+
if not table_name:
361+
raise ValueError(f"Unknown collection name: {collection_name}")
362+
363+
# Count the rows in the table
364+
sql = f"SELECT COUNT(*) FROM {table_name}"
365+
self.cursor.execute(sql)
366+
count = self.cursor.fetchone()[0]
367+
368+
return count
369+
370+
def get_latest_chunk(self, collection_name: str) -> Dict[str, Any]:
371+
"""Get the most recently inserted chunk from a collection
372+
373+
Args:
374+
collection_name: Name of the collection (pdf_documents, web_documents, repository_documents, general_knowledge)
375+
376+
Returns:
377+
Dictionary containing the content and metadata of the latest chunk
378+
"""
379+
# Map collection names to table names
380+
collection_map = {
381+
"pdf_documents": "PDFCollection",
382+
"web_documents": "WebCollection",
383+
"repository_documents": "RepoCollection",
384+
"general_knowledge": "GeneralCollection"
385+
}
386+
387+
table_name = collection_map.get(collection_name)
388+
if not table_name:
389+
raise ValueError(f"Unknown collection name: {collection_name}")
390+
391+
# Get the most recently inserted row (using ID as a proxy for insertion time)
392+
# This assumes IDs are assigned sequentially or have a timestamp component
393+
sql = f"SELECT Id, Text, MetaData FROM {table_name} ORDER BY ROWID DESC FETCH FIRST 1 ROW ONLY"
394+
self.cursor.execute(sql)
395+
row = self.cursor.fetchone()
396+
397+
if not row:
398+
raise ValueError(f"No chunks found in collection: {collection_name}")
399+
400+
result = {
401+
"id": row[0],
402+
"content": row[1],
403+
"metadata": json.loads(row[2]) if isinstance(row[2], str) else row[2]
404+
}
405+
406+
return result
341407

342408
def main():
343409
parser = argparse.ArgumentParser(description="Manage Oracle DB vector store")

agentic_rag/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,43 @@ python store.py --query "your search query"
206206
python local_rag_agent.py --query "your search query"
207207
```
208208
209+
#### Test Oracle DB Vector Store
210+
211+
The system includes a test script to verify Oracle DB connectivity and examine the contents of your collections. This is useful for:
212+
- Checking if Oracle DB is properly configured
213+
- Viewing statistics about your collections
214+
- Inspecting the content stored in each collection
215+
- Testing basic vector search functionality
216+
217+
To run the test:
218+
219+
```bash
220+
# Basic test - checks connection and runs a test query
221+
python test_oradb.py
222+
223+
# Show only collection statistics without inserting test data
224+
python test_oradb.py --stats-only
225+
226+
# Specify a custom query for testing
227+
python test_oradb.py --query "artificial intelligence"
228+
```
229+
230+
The script will:
231+
1. Verify Oracle DB credentials in your `config.yaml` file
232+
2. Test connection to the Oracle DB
233+
3. Display the total number of chunks in each collection (PDF, Web, Repository, General Knowledge)
234+
4. Show content and metadata from the most recently inserted chunk in each collection
235+
5. Unless running with `--stats-only`, insert test data and run a sample vector search
236+
237+
Requirements:
238+
- Oracle DB credentials properly configured in `config.yaml`:
239+
```yaml
240+
ORACLE_DB_USERNAME: ADMIN
241+
ORACLE_DB_PASSWORD: your_password_here
242+
ORACLE_DB_DSN: your_connection_string_here
243+
```
244+
- The `oracledb` Python package installed
245+
209246
#### Use RAG Agent
210247
211248
To query documents using either OpenAI or a local model, run:

agentic_rag/test_oradb.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,59 @@ def test_connection():
5151
print(f"✗ Connection failed: {str(e)}")
5252
return None
5353

54+
def check_collection_stats(store):
55+
"""Check statistics for each collection including total chunks and latest insertion"""
56+
if not store:
57+
print("Skipping collection stats check as connection failed")
58+
return
59+
60+
print("\n=== Collection Statistics ===")
61+
62+
collections = {
63+
"PDF Collection": "pdf_documents",
64+
"Repository Collection": "repository_documents",
65+
"Web Knowledge Base": "web_documents",
66+
"General Knowledge": "general_knowledge"
67+
}
68+
69+
for name, collection in collections.items():
70+
try:
71+
# Get total count
72+
count = store.get_collection_count(collection)
73+
print(f"\n{name}:")
74+
print(f"Total chunks: {count}")
75+
76+
# Get latest insertion if collection is not empty
77+
if count > 0:
78+
latest = store.get_latest_chunk(collection)
79+
print("Latest chunk:")
80+
print(f" Content: {latest['content'][:150]}..." if len(latest['content']) > 150 else f" Content: {latest['content']}")
81+
82+
# Print metadata
83+
if isinstance(latest['metadata'], str):
84+
try:
85+
metadata = json.loads(latest['metadata'])
86+
except:
87+
metadata = {"source": latest['metadata']}
88+
else:
89+
metadata = latest['metadata']
90+
91+
source = metadata.get('source', 'Unknown')
92+
print(f" Source: {source}")
93+
94+
# Print other metadata based on collection type
95+
if collection == "pdf_documents" and 'page' in metadata:
96+
print(f" Page: {metadata['page']}")
97+
elif collection == "repository_documents" and 'file_path' in metadata:
98+
print(f" File: {metadata['file_path']}")
99+
elif collection == "web_documents" and 'title' in metadata:
100+
print(f" Title: {metadata['title']}")
101+
else:
102+
print("No chunks found in this collection.")
103+
104+
except Exception as e:
105+
print(f"Error checking {name}: {str(e)}")
106+
54107
def test_add_and_query(store, query_text="machine learning"):
55108
"""Test adding simple data and querying it"""
56109
if not store:
@@ -109,6 +162,7 @@ def test_add_and_query(store, query_text="machine learning"):
109162
def main():
110163
parser = argparse.ArgumentParser(description="Test Oracle DB Vector Store")
111164
parser.add_argument("--query", default="machine learning", help="Query to use for testing")
165+
parser.add_argument("--stats-only", action="store_true", help="Only show collection statistics without inserting test data")
112166

113167
args = parser.parse_args()
114168

@@ -146,8 +200,13 @@ def main():
146200
# Test connection
147201
store = test_connection()
148202

149-
# Test add and query functionality
150-
test_add_and_query(store, args.query)
203+
# Check collection statistics
204+
check_collection_stats(store)
205+
206+
# If stats-only flag is not set, also test add and query functionality
207+
if not args.stats_only:
208+
# Test add and query functionality
209+
test_add_and_query(store, args.query)
151210

152211
print("\n=== Test Completed ===")
153212

0 commit comments

Comments
 (0)