Skip to content

Commit 7784f3a

Browse files
committed
added log_dir argument to both functions and updated create_sqlite_db() fun
1 parent c777aea commit 7784f3a

9 files changed

+216
-73
lines changed
3.8 KB
Binary file not shown.
450 Bytes
Binary file not shown.

sqlite_manager/create_sqlite_db.py

Lines changed: 146 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,186 @@
1-
import sqlite3
21
import pandas as pd
2+
import sqlite3
33
import os
4+
import logging
45
from typing import Union
56

67

78
def create_sqlite_db(
89
df: pd.DataFrame,
9-
schema_file: Union[str, os.PathLike] = None,
10-
db_file: Union[str, os.PathLike] = None,
10+
schema_file: Union[str, os.PathLike],
11+
db_file: Union[str, os.PathLike],
12+
table_name: str,
13+
log_dir: Union[str, os.PathLike] = None
1114
) -> None:
1215
"""
13-
Create an SQLite database using a schema file and load data from a pandas
14-
DataFrame.
16+
Create or update an SQLite database using a schema file and load data into
17+
a specified table from a pandas DataFrame.
1518
1619
Parameters
1720
----------
1821
df : pd.DataFrame
1922
The data to be loaded into the database.
2023
2124
schema_file : Union[str, os.PathLike]
22-
Path to the SQL file containing the schema definition. Default is
23-
'db_schema.sql'.
25+
Path to the SQL file containing the schema definition.
26+
27+
db_file : Union[str, os.PathLike]
28+
Path to the SQLite database file to be created or updated.
2429
25-
db_file : Union[str, os.PathLike], optional
26-
Path to the SQLite database file to be created.
27-
Defaults to './database.db'.
30+
table_name : str
31+
Name of the table to insert the DataFrame into.
2832
33+
log_dir : Union[str, os.PathLike], optional
34+
Directory where the log file is written. Default is the current
35+
working directory.
36+
37+
Raises
38+
------
39+
FileNotFoundError
40+
If the schema file does not exist.
41+
42+
ValueError
43+
If the specified table name is not found in the schema or the schema
44+
does not match the DataFrame structure.
2945
Examples
3046
--------
3147
.. code-block:: python
3248
3349
import pandas as pd
3450
from sqlite_manager import create_sqlite_db
3551
52+
# Define the DataFrame to insert
3653
data = {
37-
"id": [1, 2, 3],
38-
"name": ["Alice", "Bob", "Charlie"],
39-
"age": [25, 30, 35]
54+
"InvoiceNo": ["A001", "A002", "A003"],
55+
"StockCode": ["P001", "P002", "P003"],
56+
"Description": ["Product 1", "Product 2", "Product 3"],
57+
"Quantity": [10, 5, 20],
58+
"InvoiceDate": ["2023-01-01", "2023-01-02", "2023-01-03"],
59+
"UnitPrice": [12.5, 8.0, 15.0],
60+
"CustomerID": ["C001", "C002", "C003"],
61+
"Country": ["USA", "UK", "Germany"]
4062
}
4163
df = pd.DataFrame(data)
4264
43-
schema_file = "db_schema.sql"
44-
# Example schema (contents of db_schema.sql)
45-
# CREATE TABLE ExampleTable (
46-
# id INTEGER PRIMARY KEY,
47-
# name TEXT,
48-
# age INTEGER
65+
# Schema file (SQL file defining the database schema)
66+
schema_file = "schema.sql"
67+
# Contents of schema.sql:
68+
# CREATE TABLE IF NOT EXISTS OnlineRetail (
69+
# InvoiceNo TEXT NOT NULL,
70+
# StockCode TEXT NOT NULL,
71+
# Description TEXT,
72+
# Quantity INTEGER NOT NULL,
73+
# InvoiceDate TEXT NOT NULL,
74+
# UnitPrice REAL NOT NULL,
75+
# CustomerID TEXT,
76+
# Country TEXT
4977
# );
5078
51-
db_file = "example_database.db"
79+
# SQLite database file to create or update
80+
db_file = "data/online_retail.db"
5281
53-
create_sqlite_db(df, schema_file, db_file)
54-
"""
55-
if schema_file is None:
56-
schema_file = os.path.abspath("./db_schema.sql")
57-
if db_file is None:
58-
db_file = os.path.abspath("./database.db")
59-
60-
# Check if the database file already exists
61-
if os.path.exists(db_file):
62-
raise FileExistsError(
63-
f"The database file '{db_file}' already exists."
64-
f" Please specify a different path or remove the existing file."
82+
# Create or update the database and insert data into the table
83+
create_sqlite_db(
84+
df=df,
85+
schema_file=schema_file,
86+
db_file=db_file,
87+
table_name="OnlineRetail",
88+
log_dir="." # Optional
6589
)
66-
67-
# Create the database and apply the schema
90+
"""
91+
if log_dir is None:
92+
log_dir = os.getcwd()
93+
94+
os.makedirs(log_dir, exist_ok=True)
95+
96+
# Configure the logging
97+
log_file = os.path.join(log_dir, "create_sqlite_db.log")
98+
logging.basicConfig(
99+
level=logging.INFO,
100+
format="%(asctime)s - %(levelname)s - %(message)s",
101+
handlers=[
102+
logging.FileHandler(log_file),
103+
logging.StreamHandler()
104+
]
105+
)
106+
107+
# Validate input files
108+
if not os.path.exists(schema_file):
109+
raise FileNotFoundError(f"Schema file '{schema_file}' not found.")
110+
111+
# Read and validate schema
112+
with open(schema_file, 'r') as file:
113+
schema = file.read()
114+
115+
if (f"CREATE TABLE {table_name}" not in schema
116+
and f"CREATE TABLE IF NOT EXISTS {table_name}" not in schema):
117+
logging.error(
118+
f"Table '{table_name}' is not defined in the schema file.")
119+
raise ValueError(
120+
f"Table '{table_name}' is not defined in the schema file.")
121+
122+
# Check if the database already exists
123+
db_exists = os.path.exists(db_file)
124+
125+
# Connect to SQLite database
68126
conn = sqlite3.connect(db_file)
127+
cursor = conn.cursor()
69128
try:
70-
with open(schema_file, 'r') as file:
71-
schema = file.read()
72-
cursor = conn.cursor()
73-
cursor.executescript(schema)
74-
conn.commit()
75-
print(f"Database and schema created at: {db_file}")
76-
77-
# Load the DataFrame into the database
78-
table_name = schema.split("CREATE TABLE")[1].split("(")[0].strip()
79-
df.to_sql(
80-
table_name,
81-
conn,
82-
if_exists='append',
83-
index=False
84-
)
85-
print(f"Data inserted into table: {table_name}")
129+
if not db_exists:
130+
logging.info(
131+
f"Database does not exist. Creating new database at: {db_file}")
132+
conn.executescript(schema)
133+
conn.commit()
134+
logging.info("Schema applied successfully.")
135+
else:
136+
logging.info(f"Using existing database at: {db_file}")
137+
# Apply the schema in case new tables are defined
138+
conn.executescript(schema)
139+
conn.commit()
140+
logging.info(
141+
"Schema re-applied to ensure all definitions are current.")
142+
143+
# Check if the specified table exists
144+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
145+
existing_tables = [row[0] for row in cursor.fetchall()]
146+
if table_name in existing_tables:
147+
logging.info(
148+
f"Table '{table_name}' already exists."
149+
f" Data will be appended."
150+
)
151+
else:
152+
logging.info(
153+
f"Table '{table_name}' was created from the schema.")
154+
155+
# Validate table schema against DataFrame columns
156+
cursor.execute(f"PRAGMA table_info({table_name});")
157+
schema_columns = [row[1] for row in cursor.fetchall()]
158+
if not schema_columns:
159+
raise ValueError(
160+
f"Table '{table_name}' does not exist after applying"
161+
f" the schema."
162+
)
163+
164+
missing_columns = [col for col in df.columns if
165+
col not in schema_columns]
166+
if missing_columns:
167+
logging.error(
168+
f"Columns in DataFrame not found in table schema:"
169+
f" {missing_columns}"
170+
)
171+
raise ValueError(
172+
f"Table schema is missing required columns: {missing_columns}")
173+
174+
# Insert DataFrame into the specified table
175+
df.to_sql(table_name, conn, if_exists='append', index=False)
176+
logging.info(f"Inserted {len(df)} rows into table '{table_name}'.")
86177

87178
except sqlite3.Error as e:
88-
print(f"SQLite Error: {e}")
179+
logging.error(f"SQLite Error: {e}")
180+
raise
181+
except ValueError as ve:
182+
logging.error(f"Validation Error: {ve}")
89183
raise
90-
91184
finally:
92185
conn.close()
93-
print(f"Database connection closed: {db_file}")
186+
logging.info(f"Database connection closed: {db_file}")

sqlite_manager/run_sql_queries.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def run_sql_queries(
1313
query_dir: Union[str, os.PathLike] = None,
1414
output_dir: Union[str, os.PathLike] = None,
1515
rerun_all: bool = False,
16-
rerun_queries: List[str] = None
16+
rerun_queries: List[str] = None,
17+
log_dir: Union[str, os.PathLike] = None
1718
) -> None:
1819
"""
1920
Execute all SQL queries in a directory (including subdirectories) on a
@@ -38,7 +39,11 @@ def run_sql_queries(
3839
3940
rerun_queries : list of str, optional
4041
List of specific query filenames to rerun, regardless of existing
41-
output.
42+
output.
43+
44+
log_dir : Union[str, os.PathLike], optional
45+
Directory where the log file is written. Default is the current
46+
working directory.
4247
4348
Returns
4449
-------
@@ -58,7 +63,8 @@ def run_sql_queries(
5863
run_sql_queries (
5964
query_dir,
6065
db_file,
61-
output_dir
66+
output_dir,
67+
log_dir="."
6268
)
6369
6470
# Rerun all queries regardless of existing outputs
@@ -79,26 +85,28 @@ def run_sql_queries(
7985
8086
# # Input directory
8187
# sql_queries/
82-
# ├── major_task_1/
88+
# ├── task_1/
8389
# │ ├── query1.sql
8490
# │ ├── query2.sql
85-
# ├── major_task_2/
91+
# ├── task_2/
8692
# │ ├── query3.sql
8793
# │ └── query4.sql
8894
#
8995
# # Output Directory (Query Results):
9096
# output/
91-
# ├── major_task_1/
97+
# ├── task_1/
9298
# │ ├── query1.csv
9399
# │ ├── query2.csv
94-
# ├── major_task_2/
100+
# ├── task_2/
95101
# │ ├── query3.csv
96102
# │ └── query4.csv
97103
"""
98104
if query_dir is None:
99105
query_dir = os.path.abspath("./sql_queries")
100106
if output_dir is None:
101107
output_dir = os.path.abspath("./query_results")
108+
if log_dir is None:
109+
log_dir = os.getcwd()
102110

103111
validate_inputs(
104112
query_dir=query_dir,
@@ -109,10 +117,14 @@ def run_sql_queries(
109117
)
110118

111119
rerun_queries = set(rerun_queries or [])
120+
121+
os.makedirs(log_dir, exist_ok=True)
122+
# Configure the logging
123+
log_file = os.path.join(log_dir, "query_manager.log")
112124
logging.basicConfig(
113-
filename='query_manager.log',
125+
filename=log_file,
114126
level=logging.INFO,
115-
format='%(asctime)s - %(levelname)s - %(message)s'
127+
format="%(asctime)s - %(levelname)s - %(message)s"
116128
)
117129
logger = logging.getLogger()
118130
logger.info("Starting SQL query execution process.")
1.36 KB
Binary file not shown.
493 Bytes
Binary file not shown.

tests/create_sqlite_db.log

Whitespace-only changes.

0 commit comments

Comments
 (0)