|
8 | 8 | from template_langgraph.tools.qdrants import QdrantClientWrapper
|
9 | 9 |
|
10 | 10 | logger = get_logger(__name__)
|
11 |
| -logger.setLevel(logging.INFO) |
12 |
| -COLLECTION_NAME = "documents" |
| 11 | +logger.setLevel(logging.DEBUG) |
| 12 | +COLLECTION_NAME = "qa_kabuto" |
13 | 13 |
|
14 | 14 | if __name__ == "__main__":
|
15 | 15 | # Load documents from CSV files
|
16 | 16 | documents = CsvLoaderWrapper().load_csv_docs()
|
17 | 17 | logger.info(f"Loaded {len(documents)} documents from CSV.")
|
18 | 18 |
|
19 |
| - # hardcoded collection name for demonstration purposes |
20 |
| - |
21 |
| - logger.info(f"Upserting {len(documents)} documents into Qdrant collection: {COLLECTION_NAME}") |
22 | 19 | points = []
|
| 20 | + embedding_wrapper = AzureOpenAiWrapper() |
23 | 21 | for i, doc in enumerate(documents):
|
24 | 22 | logger.debug(f"Processing document {i}: {doc.metadata.get('source', 'unknown')}")
|
25 |
| - content = doc.page_content |
26 |
| - content = content.replace(" ", "") |
27 |
| - embedding = AzureOpenAiWrapper().create_embedding(content) |
| 23 | + content = doc.page_content.replace("\n", " ") |
| 24 | + logger.debug(f"Creating embedding for document {i} with content: {content[:50]}...") |
| 25 | + vector = embedding_wrapper.create_embedding(content) |
28 | 26 | points.append(
|
29 | 27 | PointStruct(
|
30 | 28 | id=i,
|
31 |
| - vector=embedding, |
| 29 | + vector=vector, |
32 | 30 | payload={
|
33 | 31 | "file_name": doc.metadata.get("source", f"doc_{i}"),
|
34 | 32 | "content": content,
|
35 | 33 | },
|
36 | 34 | )
|
37 | 35 | )
|
38 | 36 |
|
| 37 | + # Create Qdrant collection and upsert points |
| 38 | + logger.info(f"Creating Qdrant collection: {COLLECTION_NAME}") |
39 | 39 | qdrant_client = QdrantClientWrapper()
|
40 | 40 | qdrant_client.create_collection(
|
41 | 41 | collection_name=COLLECTION_NAME,
|
42 | 42 | vector_size=len(points[0].vector) if points else 1536, # default vector size
|
43 | 43 | )
|
44 | 44 |
|
45 |
| - logger.info(f"Created Qdrant collection: {COLLECTION_NAME}") |
| 45 | + # Upsert points into the Qdrant collection |
| 46 | + logger.info(f"Upserting points into Qdrant collection: {COLLECTION_NAME}") |
46 | 47 | operation_info = qdrant_client.upsert_points(
|
47 | 48 | collection_name=COLLECTION_NAME,
|
48 | 49 | points=points,
|
|
0 commit comments