12
12
import json
13
13
import logging
14
14
import sqlite3
15
+ import sys
15
16
import time
16
17
from pathlib import Path
17
18
from typing import Optional , Union
@@ -196,42 +197,42 @@ def init_database(db_path: str) -> str:
196
197
"""Initialize SQLite database to track downloaded files"""
197
198
Path (db_path ).parent .mkdir (parents = True , exist_ok = True )
198
199
199
- conn = sqlite3 .connect (db_path )
200
- cursor = conn .cursor ()
200
+ with sqlite3 .connect (db_path ) as conn :
201
+ cursor = conn .cursor ()
201
202
202
- # Main downloads table
203
- cursor .execute (
203
+ # Main downloads table
204
+ cursor .execute (
205
+ """
206
+ CREATE TABLE IF NOT EXISTS downloads (
207
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
208
+ url TEXT UNIQUE NOT NULL,
209
+ local_path TEXT NOT NULL,
210
+ status TEXT NOT NULL,
211
+ etag TEXT,
212
+ last_modified TEXT,
213
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
214
+ )
204
215
"""
205
- CREATE TABLE IF NOT EXISTS downloads (
206
- id INTEGER PRIMARY KEY AUTOINCREMENT,
207
- url TEXT UNIQUE,
208
- local_path TEXT,
209
- status TEXT,
210
- etag TEXT,
211
- last_modified TEXT,
212
- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
213
- )
214
- """
215
- )
216
+ )
216
217
217
- # History table to track changes
218
- cursor .execute (
218
+ # History table to track changes
219
+ cursor .execute (
220
+ """
221
+ CREATE TABLE IF NOT EXISTS download_history (
222
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
223
+ url TEXT NOT NULL,
224
+ local_path TEXT NOT NULL,
225
+ status TEXT NOT NULL,
226
+ etag TEXT,
227
+ last_modified TEXT,
228
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
229
+ change_type TEXT NOT NULL
230
+ )
219
231
"""
220
- CREATE TABLE IF NOT EXISTS download_history (
221
- id INTEGER PRIMARY KEY AUTOINCREMENT,
222
- url TEXT,
223
- local_path TEXT,
224
- status TEXT,
225
- etag TEXT,
226
- last_modified TEXT,
227
- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
228
- change_type TEXT
229
- )
230
- """
231
- )
232
+ )
233
+
234
+ conn .commit ()
232
235
233
- conn .commit ()
234
- conn .close ()
235
236
logger .info (f"Database initialized at { db_path } " )
236
237
237
238
return db_path
@@ -257,29 +258,29 @@ def record_download(
257
258
last_modified (str): Last-Modified header from the response, if available
258
259
change_type (str): Type of change (new, updated, unchanged, error)
259
260
"""
260
- conn = sqlite3 .connect (db_path )
261
- cursor = conn .cursor ()
262
261
local_path_str = str (local_path )
263
262
264
- try :
265
- # Record in main downloads table
266
- cursor .execute (
267
- "INSERT OR REPLACE INTO downloads (url, local_path, status, etag, last_modified, timestamp) VALUES (?, ?, ?, ?, ?, datetime('now'))" ,
268
- (url , local_path_str , status , etag , last_modified ),
269
- )
263
+ with sqlite3 .connect (db_path ) as conn :
264
+ cursor = conn .cursor ()
270
265
271
- # Record in history table
272
- if change_type :
266
+ try :
267
+ # Record in main downloads table
273
268
cursor .execute (
274
- "INSERT INTO download_history (url, local_path, status, etag, last_modified, change_type ) VALUES (?, ?, ?, ?, ?, ? )" ,
275
- (url , local_path_str , status , etag , last_modified , change_type ),
269
+ "INSERT OR REPLACE INTO downloads (url, local_path, status, etag, last_modified, timestamp ) VALUES (?, ?, ?, ?, ?, datetime('now') )" ,
270
+ (url , local_path_str , status , etag , last_modified ),
276
271
)
277
272
278
- conn .commit ()
279
- except Exception as e :
280
- logger .error (f"Database error: { e } " )
281
- finally :
282
- conn .close ()
273
+ # Record in history table
274
+ if change_type :
275
+ cursor .execute (
276
+ "INSERT INTO download_history (url, local_path, status, etag, last_modified, change_type) VALUES (?, ?, ?, ?, ?, ?)" ,
277
+ (url , local_path_str , status , etag , last_modified , change_type ),
278
+ )
279
+
280
+ conn .commit ()
281
+ except Exception as e :
282
+ logger .error (f"Database error: { e } " )
283
+ conn .rollback ()
283
284
284
285
return url
285
286
@@ -294,16 +295,14 @@ def get_download_status(db_path: str, url: str) -> dict:
294
295
Returns:
295
296
tuple: (etag, last_modified)
296
297
"""
297
- conn = sqlite3 .connect (db_path )
298
- cursor = conn .cursor ()
299
-
300
- cursor .execute (
301
- "SELECT etag, last_modified FROM downloads WHERE url = ? AND status = 'success'" ,
302
- (url ,),
303
- )
304
- result = cursor .fetchone ()
298
+ with sqlite3 .connect (db_path ) as conn :
299
+ cursor = conn .cursor ()
305
300
306
- conn .close ()
301
+ cursor .execute (
302
+ "SELECT etag, last_modified FROM downloads WHERE url = ? AND status = 'success'" ,
303
+ (url ,),
304
+ )
305
+ result = cursor .fetchone ()
307
306
308
307
existing_etag = result [0 ] if result else None
309
308
existing_last_modified = result [1 ] if result else None
@@ -320,18 +319,16 @@ def get_download_results(db_path: str) -> tuple[set[str], set[str]]:
320
319
Returns:
321
320
tuple: (successful_urls, failed_urls)
322
321
"""
323
- conn = sqlite3 .connect (db_path )
324
- cursor = conn .cursor ()
322
+ with sqlite3 .connect (db_path ) as conn :
323
+ cursor = conn .cursor ()
325
324
326
- # Get all successful downloads
327
- cursor .execute ("SELECT url FROM downloads WHERE status = 'success'" )
328
- successful_urls = {row [0 ] for row in cursor .fetchall ()}
325
+ # Get all successful downloads
326
+ cursor .execute ("SELECT url FROM downloads WHERE status = 'success'" )
327
+ successful_urls = {row [0 ] for row in cursor .fetchall ()}
329
328
330
- # Get all failed downloads
331
- cursor .execute ("SELECT url FROM downloads WHERE status != 'success'" )
332
- failed_urls = {row [0 ] for row in cursor .fetchall ()}
333
-
334
- conn .close ()
329
+ # Get all failed downloads
330
+ cursor .execute ("SELECT url FROM downloads WHERE status != 'success'" )
331
+ failed_urls = {row [0 ] for row in cursor .fetchall ()}
335
332
336
333
return (successful_urls , failed_urls )
337
334
@@ -345,13 +342,11 @@ def get_url_mapping(db_path: str) -> dict[str, str]:
345
342
Returns:
346
343
dict: {local_path: url}
347
344
"""
348
- conn = sqlite3 .connect (db_path )
349
- cursor = conn .cursor ()
350
-
351
- cursor .execute ("SELECT local_path, url FROM downloads WHERE status = 'success'" )
352
- mapping = {row [0 ]: row [1 ] for row in cursor .fetchall ()}
345
+ with sqlite3 .connect (db_path ) as conn :
346
+ cursor = conn .cursor ()
353
347
354
- conn .close ()
348
+ cursor .execute ("SELECT local_path, url FROM downloads WHERE status = 'success'" )
349
+ mapping = {row [0 ]: row [1 ] for row in cursor .fetchall ()}
355
350
356
351
return mapping
357
352
@@ -365,58 +360,56 @@ def get_change_report(db_path: str) -> dict:
365
360
Returns:
366
361
dict: Report data
367
362
"""
368
- conn = sqlite3 .connect (db_path )
369
- cursor = conn .cursor ()
363
+ with sqlite3 .connect (db_path ) as conn :
364
+ cursor = conn .cursor ()
370
365
371
- # Get counts by change type
372
- cursor .execute (
366
+ # Get counts by change type
367
+ cursor .execute (
368
+ """
369
+ SELECT change_type, COUNT(*)
370
+ FROM download_history
371
+ WHERE timestamp > datetime('now', '-1 hour')
372
+ GROUP BY change_type
373
373
"""
374
- SELECT change_type, COUNT(*)
375
- FROM download_history
376
- WHERE timestamp > datetime('now', '-1 hour')
377
- GROUP BY change_type
378
- """
379
- )
380
- change_counts = {row [0 ]: row [1 ] for row in cursor .fetchall ()}
374
+ )
375
+ change_counts = {row [0 ]: row [1 ] for row in cursor .fetchall ()}
381
376
382
- # Get list of updated files with timestamps
383
- cursor .execute (
377
+ # Get list of updated files with timestamps
378
+ cursor .execute (
379
+ """
380
+ SELECT h.url, h.timestamp, d.timestamp
381
+ FROM download_history h
382
+ JOIN downloads d ON h.url = d.url
383
+ WHERE h.change_type = 'updated'
384
+ AND h.timestamp > datetime('now', '-1 hour')
384
385
"""
385
- SELECT h.url, h.timestamp, d.timestamp
386
- FROM download_history h
387
- JOIN downloads d ON h.url = d.url
388
- WHERE h.change_type = 'updated'
389
- AND h.timestamp > datetime('now', '-1 hour')
390
- """
391
- )
392
- updated_files = [
393
- {"url" : row [0 ], "previous_timestamp" : row [1 ], "current_timestamp" : row [2 ]}
394
- for row in cursor .fetchall ()
395
- ]
386
+ )
387
+ updated_files = [
388
+ {"url" : row [0 ], "previous_timestamp" : row [1 ], "current_timestamp" : row [2 ]}
389
+ for row in cursor .fetchall ()
390
+ ]
396
391
397
- # Get list of new files
398
- cursor .execute (
392
+ # Get list of new files
393
+ cursor .execute (
394
+ """
395
+ SELECT url
396
+ FROM download_history
397
+ WHERE change_type = 'new'
398
+ AND timestamp > datetime('now', '-1 hour')
399
399
"""
400
- SELECT url
401
- FROM download_history
402
- WHERE change_type = 'new'
403
- AND timestamp > datetime('now', '-1 hour')
404
- """
405
- )
406
- new_files = [row [0 ] for row in cursor .fetchall ()]
400
+ )
401
+ new_files = [row [0 ] for row in cursor .fetchall ()]
407
402
408
- # Get list of errors
409
- cursor .execute (
403
+ # Get list of errors
404
+ cursor .execute (
405
+ """
406
+ SELECT url
407
+ FROM download_history
408
+ WHERE change_type = 'error'
409
+ AND timestamp > datetime('now', '-1 hour')
410
410
"""
411
- SELECT url
412
- FROM download_history
413
- WHERE change_type = 'error'
414
- AND timestamp > datetime('now', '-1 hour')
415
- """
416
- )
417
- error_files = [row [0 ] for row in cursor .fetchall ()]
418
-
419
- conn .close ()
411
+ )
412
+ error_files = [row [0 ] for row in cursor .fetchall ()]
420
413
421
414
# Create the report
422
415
report = {
0 commit comments