Skip to content

Commit 3aedc5b

Browse files
committed
added progress bar and test script
1 parent 3674b75 commit 3aedc5b

File tree

11 files changed

+201
-59
lines changed

11 files changed

+201
-59
lines changed

LICENCE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) [Year] [Thomas Rauter]
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ execute SQL queries on SQLite databases. The package supports organizing SQL que
99
- [Usage](#usage)
1010
- [Basic Example](#basic-example)
1111
- [Directory Layouts](#directory-layouts)
12-
- [Rerun Options](#rerun-options)
1312
- [Installation](#installation)
1413

1514
---

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
packages=find_packages(),
1414
install_requires=[
1515
"pandas>=2.0.0",
16+
"tqdm>=4.5.0",
1617
],
1718
python_requires=">=3.7", # Minimum Python version
1819
classifiers=[ # Metadata for PyPI
248 Bytes
Binary file not shown.
5.37 KB
Binary file not shown.

sqlite_query_manager/run_sql_queries.py

Lines changed: 92 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1+
from typing import Union, List
12
import os
23
import sqlite3
34
import pandas as pd
45
import logging
56
from pathlib import Path
6-
from typing import Union, List
7+
from tqdm import tqdm
8+
import time
79

810

911
def run_sql_queries(
10-
query_dir: Union[str, os.PathLike],
11-
db_file: Union[str, os.PathLike],
12-
output_dir: Union[str, os.PathLike],
13-
rerun_all: bool = False,
14-
rerun_queries: List[str] = None
12+
query_dir: Union[str, os.PathLike],
13+
db_file: Union[str, os.PathLike],
14+
output_dir: Union[str, os.PathLike],
15+
rerun_all: bool = False,
16+
rerun_queries: List[str] = None
1517
) -> None:
1618
"""
1719
Execute all SQL queries in a directory (including subdirectories) on a
@@ -44,78 +46,110 @@ def run_sql_queries(
4446
rerun_queries=rerun_queries
4547
)
4648

47-
# Normalize rerun_queries for comparison
4849
rerun_queries = set(rerun_queries or [])
49-
50-
# Configure logging
5150
logging.basicConfig(
5251
filename='query_manager.log',
5352
level=logging.INFO,
5453
format='%(asctime)s - %(levelname)s - %(message)s'
5554
)
5655
logger = logging.getLogger()
57-
5856
logger.info("Starting SQL query execution process.")
5957

60-
# Connect to SQLite database
58+
# Collect all SQL query files
59+
query_files = []
60+
for root, _, files in os.walk(query_dir):
61+
for file_name in files:
62+
if file_name.endswith('.sql'):
63+
sql_file_path = os.path.join(
64+
root,
65+
file_name
66+
)
67+
query_files.append(sql_file_path)
68+
69+
# Filter queries based on rerun criteria
70+
queries_to_execute = []
71+
for sql_file_path in query_files:
72+
relative_path = os.path.relpath(
73+
os.path.dirname(sql_file_path),
74+
query_dir
75+
)
76+
77+
target_output_dir = os.path.join(
78+
output_dir,
79+
relative_path
80+
)
81+
82+
os.makedirs(
83+
target_output_dir,
84+
exist_ok=True
85+
)
86+
output_file_path = os.path.join(
87+
target_output_dir,
88+
f"{Path(sql_file_path).stem}.csv"
89+
)
90+
if (
91+
not os.path.exists(output_file_path) or
92+
rerun_all or
93+
Path(sql_file_path).name in rerun_queries
94+
):
95+
queries_to_execute.append((
96+
sql_file_path,
97+
output_file_path
98+
))
99+
100+
if not queries_to_execute:
101+
logger.info("No queries to execute. Exiting.")
102+
print("No queries to execute.")
103+
return
104+
105+
# Connect to the SQLite database
61106
try:
62107
conn = sqlite3.connect(db_file)
63108
logger.info(f"Connected to database: {db_file}")
64109
except Exception as e:
65110
logger.error(f"Failed to connect to database: {db_file}. Error: {e}")
66111
return
67112

68-
# Walk through the SQL directory structure
69-
for root, _, files in os.walk(query_dir):
70-
relative_path = os.path.relpath(root, query_dir)
71-
target_output_dir = os.path.join(output_dir, relative_path)
113+
# Progress bar setup
114+
progress = tqdm(
115+
total=len(queries_to_execute),
116+
desc="Running queries",
117+
unit=" query",
118+
unit_scale=True
119+
)
72120

73-
# Ensure output subdirectory exists
74-
os.makedirs(target_output_dir, exist_ok=True)
121+
execution_times = []
75122

76-
for file_name in files:
77-
if file_name.endswith('.sql'): # Process only .sql files
78-
sql_file_path = os.path.join(root, file_name)
79-
output_file_path = os.path.join(
80-
target_output_dir,
81-
f"{Path(file_name).stem}.csv"
82-
)
123+
for sql_file_path, output_file_path in queries_to_execute:
124+
start_query_time = time.time()
125+
try:
126+
with open(sql_file_path, 'r') as query_file:
127+
query = query_file.read()
128+
129+
logger.info(f"Executing query: {sql_file_path}")
130+
df = pd.read_sql_query(query, conn)
131+
df.to_csv(output_file_path, index=False)
132+
logger.info(
133+
f"Query executed successfully. Output saved to:"
134+
f" {output_file_path}"
135+
)
136+
137+
except Exception as e:
138+
logger.error(f"Error executing query: {sql_file_path}. Error: {e}")
139+
140+
# Update progress and estimate remaining time
141+
query_duration = time.time() - start_query_time
142+
execution_times.append(query_duration)
143+
avg_time_per_query = sum(execution_times) / len(execution_times)
144+
remaining_queries = len(queries_to_execute) - progress.n - 1
145+
estimated_time_left = avg_time_per_query * remaining_queries
146+
progress.set_postfix_str(f"ETA: {estimated_time_left:.2f}s")
147+
progress.update(1)
83148

84-
# Skip if output already exists and rerun_all is False,
85-
# unless in rerun_queries
86-
if (os.path.exists(output_file_path)
87-
and not rerun_all
88-
and file_name not in rerun_queries):
89-
logger.info(
90-
f"Skipping query (output exists):"
91-
f" {sql_file_path}"
92-
)
93-
continue
94-
95-
# Read and execute the SQL query
96-
try:
97-
with open(sql_file_path, 'r') as query_file:
98-
query = query_file.read()
99-
100-
logger.info(f"Executing query: {sql_file_path}")
101-
df = pd.read_sql_query(query, conn)
102-
103-
# Save result to CSV
104-
df.to_csv(output_file_path, index=False)
105-
logger.info(
106-
f"Query executed successfully. Output saved to:"
107-
f" {output_file_path}"
108-
)
109-
110-
except Exception as e:
111-
logger.error(
112-
f"Error executing query:"
113-
f" {sql_file_path}. Error: {e}"
114-
)
115-
116-
# Close the database connection
149+
progress.close()
117150
conn.close()
118151
logger.info("SQL query execution process completed.")
152+
print("SQL query execution process completed.")
119153

120154

121155
def validate_inputs(
@@ -184,7 +218,7 @@ def validate_inputs(
184218

185219
# No need to check if output_dir exists, as it may be created later
186220

187-
# Validate force_rerun
221+
# Validate rerun_all
188222
if not isinstance(rerun_all, bool):
189223
raise TypeError(
190224
f"'force_rerun' must be a boolean."

tests/query_manager.log

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2025-01-01 13:36:35,140 - INFO - Starting SQL query execution process.
2+
2025-01-01 13:36:35,143 - INFO - Connected to database: test_sql\test_database.db
3+
2025-01-01 13:36:35,190 - INFO - Executing query: test_sql\queries\sample_query.sql
4+
2025-01-01 13:36:35,210 - INFO - Query executed successfully. Output saved to: test_sql\output\.\sample_query.csv
5+
2025-01-01 13:36:35,212 - INFO - SQL query execution process completed.
6+
2025-01-01 13:39:53,500 - INFO - Starting SQL query execution process.
7+
2025-01-01 13:39:53,502 - INFO - Connected to database: test_sql\test_database.db
8+
2025-01-01 13:39:53,532 - INFO - Executing query: test_sql\queries\sample_query.sql
9+
2025-01-01 13:39:53,546 - INFO - Query executed successfully. Output saved to: test_sql\output\.\sample_query.csv
10+
2025-01-01 13:39:53,547 - INFO - SQL query execution process completed.

tests/test_run_sql_queries.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import sqlite3
2+
import os
3+
from sqlite_query_manager.run_sql_queries import run_sql_queries
4+
5+
# Paths for testing
6+
TEST_DIR = "test_sql"
7+
DB_FILE = os.path.join(TEST_DIR, "test_database.db")
8+
QUERY_DIR = os.path.join(TEST_DIR, "queries")
9+
OUTPUT_DIR = os.path.join(TEST_DIR, "output")
10+
11+
12+
def create_test_environment():
13+
"""
14+
Creates a minimal SQLite database, test query, and directory structure.
15+
"""
16+
os.makedirs(QUERY_DIR, exist_ok=True)
17+
os.makedirs(OUTPUT_DIR, exist_ok=True)
18+
19+
# Create a sample SQLite database
20+
conn = sqlite3.connect(DB_FILE)
21+
cursor = conn.cursor()
22+
23+
# Create a sample table
24+
cursor.execute("""
25+
CREATE TABLE IF NOT EXISTS test_table (
26+
id INTEGER PRIMARY KEY,
27+
name TEXT,
28+
age INTEGER
29+
)
30+
""")
31+
32+
# Insert some test data
33+
cursor.executemany("""
34+
INSERT INTO test_table (name, age) VALUES (?, ?)
35+
""", [
36+
("Alice", 25),
37+
("Bob", 30),
38+
("Charlie", 35)
39+
])
40+
conn.commit()
41+
conn.close()
42+
43+
# Create a sample query file
44+
with open(os.path.join(QUERY_DIR, "sample_query.sql"), "w") as f:
45+
f.write("SELECT * FROM test_table;")
46+
47+
48+
def test_run_sql_queries():
49+
"""
50+
Tests the run_sql_queries function with the created test environment.
51+
"""
52+
run_sql_queries(
53+
query_dir=QUERY_DIR,
54+
db_file=DB_FILE,
55+
output_dir=OUTPUT_DIR,
56+
rerun_all=True
57+
)
58+
59+
# Check the output
60+
output_file = os.path.join(OUTPUT_DIR, "sample_query.csv")
61+
if os.path.exists(output_file):
62+
print("Test passed: Output file created.")
63+
else:
64+
print("Test failed: Output file not found.")
65+
66+
67+
if __name__ == "__main__":
68+
create_test_environment()
69+
test_run_sql_queries()
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
id,name,age
2+
1,Alice,25
3+
2,Bob,30
4+
3,Charlie,35
5+
4,Alice,25
6+
5,Bob,30
7+
6,Charlie,35
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT * FROM test_table;

0 commit comments

Comments
 (0)