-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjson._to_sqlite.py
More file actions
57 lines (46 loc) · 1.87 KB
/
json._to_sqlite.py
File metadata and controls
57 lines (46 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import sqlite3
import os
import re
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
json_path = os.path.join(BASE_DIR, "prompt-search", "data", "domain", "metadata.json")
db_path = os.path.join(BASE_DIR, "data", "courses.db")
def hydrate_database():
if not os.path.exists(json_path):
print(f"Error: Could not find metadata.json at {json_path}")
return
# 1. Load the JSON data
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print(f"Found {len(data)} entries in JSON. Starting migration...")
# 2. Clear existing (empty) data to avoid duplicates
cursor.execute("DELETE FROM courses")
count = 0
for entry in data:
# Extract fields from your specific metadata format
course_code = entry.get('course_code', '')
prereqs = entry.get('prereq_raw', 'None')
description = entry.get('chunk_text', '')
# Split 'CPT S 321' into 'CPT S' and '321'
match = re.search(r'([A-Z\s]{2,6})\s*(\d{3})', course_code)
if match:
prefix = match.group(1).strip()
number = match.group(2).strip()
# 3. Insert into the courses table
cursor.execute("""
INSERT INTO courses (
prefix, courseNumber, title, courseDescription, coursePrerequisite,
uniqueId, campus, term, year, isLab, sectionNumber
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
prefix, number, course_code, description, prereqs,
f"ID-{count}", "Pullman", "Spring", 2026, 0, "01"
))
count += 1
conn.commit()
conn.close()
print(f"Success! {count} courses migrated from JSON to SQLite.")
if __name__ == "__main__":
hydrate_database()