-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathforce_feed_news.py
More file actions
64 lines (51 loc) · 2.01 KB
/
force_feed_news.py
File metadata and controls
64 lines (51 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from src.data_ingestion.rss_client import fetch_rss_news
from src.rag.embeddings import Embedder
from src.rag.vector_store import QdrantVecDB
import pandas as pd
def force_feed():
print("--- 💉 Force Feeding News into Cortexa ---")
# 1. Fetch News Directly
print("1. Fetching News...")
df = fetch_rss_news("config.yaml")
if df is None or df.empty:
print("❌ No news fetched. Check your internet or RSS sources.")
return
print(f"✅ Fetched {len(df)} articles.")
# 2. Initialize DB
print("2. Connecting to Database...")
embedder = Embedder()
db = QdrantVecDB(vector_size=384)
# Check count before
initial_count = db.client.count(db.collection_name).count
print(f" Count BEFORE: {initial_count}")
# 3. Prepare Data
print("3. Embedding & Indexing...")
texts = (df['title'] + " - " + df['summary']).tolist()
vectors = embedder.encode(texts)
metadatas = df.to_dict('records')
# Ensure tags are present
for m in metadatas:
m['event_type'] = 'news'
# Ensure ticker is set (handle NaNs)
if pd.isna(m.get('ticker')):
m['ticker'] = 'MARKET'
# 4. Upsert
db.upsert(texts, vectors, metadatas)
# 5. Verify
final_count = db.client.count(db.collection_name).count
print(f" Count AFTER: {final_count}")
if final_count > initial_count:
print("✅ SUCCESS: Database grew. News is inside.")
# Rapid Test
print("\n--- 🧪 Quick Retrieval Test ---")
q_vec = embedder.encode(["Outlook on Apple"])[0]
results = db.search(q_vec, top_k=3, filters={"event_type": "news"})
if results:
print(f" Found: {results[0]['meta']['title']}")
print(f" Score: {results[0]['score']:.4f}")
else:
print(" ❌ Retrieval failed even after insert.")
else:
print("❌ FAILURE: Database count did not increase.")
if __name__ == "__main__":
force_feed()