Skip to content

Commit 83c508b

Browse files
committed
Improvements in working with the database
1 parent 99594c2 commit 83c508b

File tree

1 file changed

+110
-117
lines changed

1 file changed

+110
-117
lines changed

scripts/portal-fetcher/openshift-docs-downloader.py

Lines changed: 110 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import json
1313
import logging
1414
import sqlite3
15+
import sys
1516
import time
1617
from pathlib import Path
1718
from typing import Optional, Union
@@ -196,42 +197,42 @@ def init_database(db_path: str) -> str:
196197
"""Initialize SQLite database to track downloaded files"""
197198
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
198199

199-
conn = sqlite3.connect(db_path)
200-
cursor = conn.cursor()
200+
with sqlite3.connect(db_path) as conn:
201+
cursor = conn.cursor()
201202

202-
# Main downloads table
203-
cursor.execute(
203+
# Main downloads table
204+
cursor.execute(
205+
"""
206+
CREATE TABLE IF NOT EXISTS downloads (
207+
id INTEGER PRIMARY KEY AUTOINCREMENT,
208+
url TEXT UNIQUE NOT NULL,
209+
local_path TEXT NOT NULL,
210+
status TEXT NOT NULL,
211+
etag TEXT,
212+
last_modified TEXT,
213+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
214+
)
204215
"""
205-
CREATE TABLE IF NOT EXISTS downloads (
206-
id INTEGER PRIMARY KEY AUTOINCREMENT,
207-
url TEXT UNIQUE,
208-
local_path TEXT,
209-
status TEXT,
210-
etag TEXT,
211-
last_modified TEXT,
212-
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
213-
)
214-
"""
215-
)
216+
)
216217

217-
# History table to track changes
218-
cursor.execute(
218+
# History table to track changes
219+
cursor.execute(
220+
"""
221+
CREATE TABLE IF NOT EXISTS download_history (
222+
id INTEGER PRIMARY KEY AUTOINCREMENT,
223+
url TEXT NOT NULL,
224+
local_path TEXT NOT NULL,
225+
status TEXT NOT NULL,
226+
etag TEXT,
227+
last_modified TEXT,
228+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
229+
change_type TEXT NOT NULL
230+
)
219231
"""
220-
CREATE TABLE IF NOT EXISTS download_history (
221-
id INTEGER PRIMARY KEY AUTOINCREMENT,
222-
url TEXT,
223-
local_path TEXT,
224-
status TEXT,
225-
etag TEXT,
226-
last_modified TEXT,
227-
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
228-
change_type TEXT
229-
)
230-
"""
231-
)
232+
)
233+
234+
conn.commit()
232235

233-
conn.commit()
234-
conn.close()
235236
logger.info(f"Database initialized at {db_path}")
236237

237238
return db_path
@@ -257,29 +258,29 @@ def record_download(
257258
last_modified (str): Last-Modified header from the response, if available
258259
change_type (str): Type of change (new, updated, unchanged, error)
259260
"""
260-
conn = sqlite3.connect(db_path)
261-
cursor = conn.cursor()
262261
local_path_str = str(local_path)
263262

264-
try:
265-
# Record in main downloads table
266-
cursor.execute(
267-
"INSERT OR REPLACE INTO downloads (url, local_path, status, etag, last_modified, timestamp) VALUES (?, ?, ?, ?, ?, datetime('now'))",
268-
(url, local_path_str, status, etag, last_modified),
269-
)
263+
with sqlite3.connect(db_path) as conn:
264+
cursor = conn.cursor()
270265

271-
# Record in history table
272-
if change_type:
266+
try:
267+
# Record in main downloads table
273268
cursor.execute(
274-
"INSERT INTO download_history (url, local_path, status, etag, last_modified, change_type) VALUES (?, ?, ?, ?, ?, ?)",
275-
(url, local_path_str, status, etag, last_modified, change_type),
269+
"INSERT OR REPLACE INTO downloads (url, local_path, status, etag, last_modified, timestamp) VALUES (?, ?, ?, ?, ?, datetime('now'))",
270+
(url, local_path_str, status, etag, last_modified),
276271
)
277272

278-
conn.commit()
279-
except Exception as e:
280-
logger.error(f"Database error: {e}")
281-
finally:
282-
conn.close()
273+
# Record in history table
274+
if change_type:
275+
cursor.execute(
276+
"INSERT INTO download_history (url, local_path, status, etag, last_modified, change_type) VALUES (?, ?, ?, ?, ?, ?)",
277+
(url, local_path_str, status, etag, last_modified, change_type),
278+
)
279+
280+
conn.commit()
281+
except Exception as e:
282+
logger.error(f"Database error: {e}")
283+
conn.rollback()
283284

284285
return url
285286

@@ -294,16 +295,14 @@ def get_download_status(db_path: str, url: str) -> dict:
294295
Returns:
295296
tuple: (etag, last_modified)
296297
"""
297-
conn = sqlite3.connect(db_path)
298-
cursor = conn.cursor()
299-
300-
cursor.execute(
301-
"SELECT etag, last_modified FROM downloads WHERE url = ? AND status = 'success'",
302-
(url,),
303-
)
304-
result = cursor.fetchone()
298+
with sqlite3.connect(db_path) as conn:
299+
cursor = conn.cursor()
305300

306-
conn.close()
301+
cursor.execute(
302+
"SELECT etag, last_modified FROM downloads WHERE url = ? AND status = 'success'",
303+
(url,),
304+
)
305+
result = cursor.fetchone()
307306

308307
existing_etag = result[0] if result else None
309308
existing_last_modified = result[1] if result else None
@@ -320,18 +319,16 @@ def get_download_results(db_path: str) -> tuple[set[str], set[str]]:
320319
Returns:
321320
tuple: (successful_urls, failed_urls)
322321
"""
323-
conn = sqlite3.connect(db_path)
324-
cursor = conn.cursor()
322+
with sqlite3.connect(db_path) as conn:
323+
cursor = conn.cursor()
325324

326-
# Get all successful downloads
327-
cursor.execute("SELECT url FROM downloads WHERE status = 'success'")
328-
successful_urls = {row[0] for row in cursor.fetchall()}
325+
# Get all successful downloads
326+
cursor.execute("SELECT url FROM downloads WHERE status = 'success'")
327+
successful_urls = {row[0] for row in cursor.fetchall()}
329328

330-
# Get all failed downloads
331-
cursor.execute("SELECT url FROM downloads WHERE status != 'success'")
332-
failed_urls = {row[0] for row in cursor.fetchall()}
333-
334-
conn.close()
329+
# Get all failed downloads
330+
cursor.execute("SELECT url FROM downloads WHERE status != 'success'")
331+
failed_urls = {row[0] for row in cursor.fetchall()}
335332

336333
return (successful_urls, failed_urls)
337334

@@ -345,13 +342,11 @@ def get_url_mapping(db_path: str) -> dict[str, str]:
345342
Returns:
346343
dict: {local_path: url}
347344
"""
348-
conn = sqlite3.connect(db_path)
349-
cursor = conn.cursor()
350-
351-
cursor.execute("SELECT local_path, url FROM downloads WHERE status = 'success'")
352-
mapping = {row[0]: row[1] for row in cursor.fetchall()}
345+
with sqlite3.connect(db_path) as conn:
346+
cursor = conn.cursor()
353347

354-
conn.close()
348+
cursor.execute("SELECT local_path, url FROM downloads WHERE status = 'success'")
349+
mapping = {row[0]: row[1] for row in cursor.fetchall()}
355350

356351
return mapping
357352

@@ -365,58 +360,56 @@ def get_change_report(db_path: str) -> dict:
365360
Returns:
366361
dict: Report data
367362
"""
368-
conn = sqlite3.connect(db_path)
369-
cursor = conn.cursor()
363+
with sqlite3.connect(db_path) as conn:
364+
cursor = conn.cursor()
370365

371-
# Get counts by change type
372-
cursor.execute(
366+
# Get counts by change type
367+
cursor.execute(
368+
"""
369+
SELECT change_type, COUNT(*)
370+
FROM download_history
371+
WHERE timestamp > datetime('now', '-1 hour')
372+
GROUP BY change_type
373373
"""
374-
SELECT change_type, COUNT(*)
375-
FROM download_history
376-
WHERE timestamp > datetime('now', '-1 hour')
377-
GROUP BY change_type
378-
"""
379-
)
380-
change_counts = {row[0]: row[1] for row in cursor.fetchall()}
374+
)
375+
change_counts = {row[0]: row[1] for row in cursor.fetchall()}
381376

382-
# Get list of updated files with timestamps
383-
cursor.execute(
377+
# Get list of updated files with timestamps
378+
cursor.execute(
379+
"""
380+
SELECT h.url, h.timestamp, d.timestamp
381+
FROM download_history h
382+
JOIN downloads d ON h.url = d.url
383+
WHERE h.change_type = 'updated'
384+
AND h.timestamp > datetime('now', '-1 hour')
384385
"""
385-
SELECT h.url, h.timestamp, d.timestamp
386-
FROM download_history h
387-
JOIN downloads d ON h.url = d.url
388-
WHERE h.change_type = 'updated'
389-
AND h.timestamp > datetime('now', '-1 hour')
390-
"""
391-
)
392-
updated_files = [
393-
{"url": row[0], "previous_timestamp": row[1], "current_timestamp": row[2]}
394-
for row in cursor.fetchall()
395-
]
386+
)
387+
updated_files = [
388+
{"url": row[0], "previous_timestamp": row[1], "current_timestamp": row[2]}
389+
for row in cursor.fetchall()
390+
]
396391

397-
# Get list of new files
398-
cursor.execute(
392+
# Get list of new files
393+
cursor.execute(
394+
"""
395+
SELECT url
396+
FROM download_history
397+
WHERE change_type = 'new'
398+
AND timestamp > datetime('now', '-1 hour')
399399
"""
400-
SELECT url
401-
FROM download_history
402-
WHERE change_type = 'new'
403-
AND timestamp > datetime('now', '-1 hour')
404-
"""
405-
)
406-
new_files = [row[0] for row in cursor.fetchall()]
400+
)
401+
new_files = [row[0] for row in cursor.fetchall()]
407402

408-
# Get list of errors
409-
cursor.execute(
403+
# Get list of errors
404+
cursor.execute(
405+
"""
406+
SELECT url
407+
FROM download_history
408+
WHERE change_type = 'error'
409+
AND timestamp > datetime('now', '-1 hour')
410410
"""
411-
SELECT url
412-
FROM download_history
413-
WHERE change_type = 'error'
414-
AND timestamp > datetime('now', '-1 hour')
415-
"""
416-
)
417-
error_files = [row[0] for row in cursor.fetchall()]
418-
419-
conn.close()
411+
)
412+
error_files = [row[0] for row in cursor.fetchall()]
420413

421414
# Create the report
422415
report = {

0 commit comments

Comments
 (0)