-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_db.py
More file actions
32 lines (26 loc) · 1.05 KB
/
check_db.py
File metadata and controls
32 lines (26 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv
load_dotenv()
# 加载向量数据库
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
# 检查数据库内容
print("=== 检查向量数据库 ===")
print(f"总文档数: {vector_db._collection.count()}")
# 获取所有文档的元数据
all_docs = vector_db.get()
sources = set()
for metadata in all_docs["metadatas"]:
if metadata and "source" in metadata:
sources.add(metadata["source"])
print(f"\n发现的文档来源:")
for source in sorted(sources):
print(f" - {source}")
# 测试检索World Bank相关内容
print("\n=== 测试检索 'World Bank infrastructure' ===")
results = vector_db.similarity_search("World Bank infrastructure", k=5)
for i, doc in enumerate(results):
source = doc.metadata.get("source", "Unknown")
print(f"\n结果 {i + 1}: {source}")
print(f"内容预览: {doc.page_content[:200]}...")