Skip to content

Commit 3e97868

Browse files
committed
working on week 1
1 parent a57d1e3 commit 3e97868

File tree

8 files changed

+24
-0
lines changed

8 files changed

+24
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import duckdb
2+
import os
3+
4+
DB_FILE = 'data/connectsphere.duckdb'
5+
RAW_DATA_DIR = 'data/raw'
6+
7+
# Remove old database file if it exists to ensure a fresh start
8+
if os.path.exists(DB_FILE):
9+
os.remove(DB_FILE)
10+
11+
con = duckdb.connect(database=DB_FILE, read_only=False)
12+
13+
print("Database created. Creating tables...")
14+
15+
# Create tables from Parquet/CSV files
16+
con.execute(f"CREATE TABLE events AS SELECT * FROM read_parquet('{RAW_DATA_DIR}/events.parquet');")
17+
con.execute(f"CREATE TABLE users AS SELECT * FROM read_parquet('{RAW_DATA_DIR}/users.parquet');")
18+
con.execute(f"CREATE TABLE reviews AS SELECT * FROM read_csv_auto('{RAW_DATA_DIR}/app_store_reviews.csv');")
19+
20+
print("Tables created successfully:")
21+
print(con.execute("SHOW TABLES;").fetchdf())
22+
23+
con.close()
24+
print("Database initialization complete.")

scripts/python/load_data_duckdb.py

Whitespace-only changes.

scripts/sql/keyword_search.sql

Whitespace-only changes.

0 commit comments

Comments
 (0)