-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathingest.py
More file actions
76 lines (61 loc) · 2.1 KB
/
ingest.py
File metadata and controls
76 lines (61 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Mixing Encyclopedia - Knowledge Ingestion Script
Loads mixing knowledge from JSON into ChromaDB
"""
import chromadb
import json
from pathlib import Path
import sys
import io
# Fix Windows console encoding for emojis
if sys.platform == "win32":
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def ingest_knowledge():
"""Load knowledge from JSON file into ChromaDB"""
# Initialize ChromaDB
print("🔧 Initializing ChromaDB...")
client = chromadb.PersistentClient(path="./chroma_db")
# Delete existing collection if it exists
try:
client.delete_collection(name="mixing_knowledge")
print("🗑️ Deleted existing collection")
except:
pass
# Create new collection
collection = client.create_collection(
name="mixing_knowledge",
metadata={"description": "Personal mixing knowledge base"}
)
print("✅ Created new collection")
# Load knowledge from JSON
knowledge_file = Path("data/knowledge.json")
if not knowledge_file.exists():
print(f"❌ Error: {knowledge_file} not found!")
print("📝 Create data/knowledge.json with your mixing knowledge")
return
with open(knowledge_file, 'r', encoding='utf-8') as f:
knowledge = json.load(f)
print(f"📚 Loading {len(knowledge)} entries...")
# Prepare data for ChromaDB
documents = []
metadatas = []
ids = []
for i, entry in enumerate(knowledge):
documents.append(entry['content'])
metadatas.append({
'topic': entry['topic'],
'category': entry['category'],
'source': entry['source']
})
ids.append(f"entry_{i}")
# Add to collection
collection.add(
documents=documents,
metadatas=metadatas,
ids=ids
)
print(f"✅ Successfully ingested {len(knowledge)} entries!")
print(f"📊 Total entries in database: {collection.count()}")
print("\n🎉 Ready to use! Run the Jupyter notebook to query your knowledge.")
if __name__ == "__main__":
ingest_knowledge()