@@ -38,77 +38,88 @@ def allowed_music_file(filename):
3838
3939# ========== Helper Functions ========== #
4040
41- # auto_log_material_files ---
4241def ensure_uploads_log_schema ():
43- # Creates the table and backfills missing columns so existing DBs keep working
42+ """Create/upgrade uploads_log to the expected schema; ensure uniqueness."""
4443 with sqlite3 .connect (DB_NAME ) as conn :
4544 c = conn .cursor ()
45+ # Create table if missing (includes UNIQUE on the key)
4646 c .execute ("""
47- CREATE TABLE IF NOT EXISTS uploads_log (
48- property TEXT NOT NULL ,
49- tab TEXT NOT NULL,
50- filename TEXT NOT NULL,
51- uploaded_at TEXT ,
52- source TEXT,
53- description TEXT
54- )
47+ CREATE TABLE IF NOT EXISTS uploads_log (
48+ id INTEGER PRIMARY KEY AUTOINCREMENT ,
49+ property TEXT NOT NULL,
50+ tab TEXT NOT NULL,
51+ filename TEXT NOT NULL ,
52+ uploaded_at TEXT,
53+ UNIQUE(property, tab, filename)
54+ )
5555 """ )
56- # Backfill columns if the table already existed without them
57- existing = {row [1 ] for row in c .execute ("PRAGMA table_info(uploads_log)" )}
58- for col , ddl in [
59- ( "uploaded_at" , " ALTER TABLE uploads_log ADD COLUMN uploaded_at TEXT" ),
60- ( "source" , "ALTER TABLE uploads_log ADD COLUMN source TEXT" ),
61- ( "description" , "ALTER TABLE uploads_log ADD COLUMN description TEXT" ),
62- ]:
63- if col not in existing :
64- c . execute ( ddl )
56+ # Ensure uploaded_at column exists (for older DBs)
57+ cols = {row [1 ] for row in c .execute ("PRAGMA table_info(uploads_log)" ). fetchall ( )}
58+ if "uploaded_at" not in cols :
59+ c . execute ( " ALTER TABLE uploads_log ADD COLUMN uploaded_at TEXT" )
60+ # Try to migrate from legacy logged_at if it exists
61+ try :
62+ c . execute ( "UPDATE uploads_log SET uploaded_at = COALESCE(uploaded_at, logged_at) WHERE uploaded_at IS NULL" )
63+ except sqlite3 . OperationalError :
64+ pass # logged_at may not exist; ignore
6565
66+ # Ensure a unique index exists even if the table was created long ago
6667 c .execute ("""
67- CREATE UNIQUE INDEX IF NOT EXISTS idx_uploads_unique
68- ON uploads_log(property, tab, filename)
68+ CREATE UNIQUE INDEX IF NOT EXISTS idx_uploads_unique
69+ ON uploads_log(property, tab, filename)
6970 """ )
7071 conn .commit ()
7172
7273def auto_log_material_files ():
74+ """
75+ Walk UPLOAD_FOLDER and upsert one row per (property, tab, filename).
76+ Idempotent: safe to call many times; never throws UNIQUE errors.
77+ """
7378 ensure_uploads_log_schema ()
7479
75- # Avoid relying on current_app when we can read from app.config directly
76- upload_root = app .config .get ("UPLOAD_FOLDER" , UPLOAD_FOLDER )
77- if not os .path .exists (upload_root ):
78- return
80+ root_dir = UPLOAD_FOLDER
81+ if not os .path .exists (root_dir ):
82+ return {"status" : "skip" , "reason" : "UPLOAD_FOLDER missing" , "added_or_updated" : 0 }
7983
80- all_allowed_exts = ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS
81- to_insert = []
82-
83- for root , _ , files in os .walk (upload_root ):
84- rel_root = os .path .relpath (root , upload_root )
85- if rel_root .split (os .sep )[0 ] == "clips" :
86- continue
84+ allowed_exts = (ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS )
85+ rows = [] # (property, tab, filename, uploaded_at)
8786
87+ for root , _dirs , files in os .walk (root_dir ):
8888 for fname in files :
89- ext = fname .rsplit ("." , 1 )[- 1 ].lower () if "." in fname else ""
90- if ext not in all_allowed_exts :
89+ ext = fname .rsplit ('.' , 1 )[- 1 ].lower () if '.' in fname else ''
90+ if ext not in allowed_exts :
9191 continue
9292
93- rel_path = os .path .relpath (os .path .join (root , fname ), upload_root )
93+ full = os .path .join (root , fname )
94+ rel_path = os .path .relpath (full , root_dir ) # e.g. bandgap/dataset/foo.csv
9495 parts = rel_path .split (os .sep )
96+
97+ # Skip music under uploads/clips/
98+ if parts and parts [0 ] == 'clips' :
99+ continue
100+
95101 if len (parts ) >= 3 :
96- property_name , tab , file_name = parts [0 ], parts [1 ], parts [2 ]
97- to_insert .append ((property_name , tab , file_name ))
102+ prop , tab , filename = parts [0 ], parts [1 ], parts [2 ]
103+ rows .append ((prop , tab , filename , datetime . utcnow (). isoformat ( timespec = "seconds" ) ))
98104
99- if not to_insert :
100- return
105+ if not rows :
106+ return { "status" : "ok" , "added_or_updated" : 0 }
101107
108+ added_or_updated = 0
102109 with sqlite3 .connect (DB_NAME ) as conn :
103110 c = conn .cursor ()
104- # Upsert: if the file is already logged, refresh uploaded_at
105- c .executemany ("""
106- INSERT INTO uploads_log (property, tab, filename, uploaded_at)
107- VALUES (?, ?, ?, CURRENT_TIMESTAMP)
108- ON CONFLICT(property, tab, filename)
109- DO UPDATE SET uploaded_at=excluded.uploaded_at
110- """ , to_insert )
111+ upsert = """
112+ INSERT INTO uploads_log (property, tab, filename, uploaded_at)
113+ VALUES (?, ?, ?, ?)
114+ ON CONFLICT(property, tab, filename)
115+ DO UPDATE SET uploaded_at = excluded.uploaded_at
116+ """
117+ for r in rows :
118+ c .execute (upsert , r )
119+ added_or_updated += 1
111120 conn .commit ()
121+
122+ return {"status" : "ok" , "added_or_updated" : added_or_updated }
112123
113124# Automation of import to sqlite3 database
114125def auto_import_uploads ():
@@ -231,33 +242,36 @@ def _run_startup_tasks():
231242def _startup_once ():
232243 if not _startup_done :
233244 _run_startup_tasks ()
245+
234246
235247# ========== ROUTES ==========
248+
249+
236250# Admin only rescanning for duplicates and re-importing
237- def _uploads_count ():
238- import sqlite3
239- with sqlite3 .connect (DB_NAME ) as conn :
240- (n ,) = conn .execute ("SELECT COUNT(*) FROM uploads_log" ).fetchone ()
241- return n
242251
243- @app .route (' /admin/rescan_uploads' , methods = [ 'GET' , 'POST' ] )
252+ @app .route (" /admin/rescan_uploads" )
244253def admin_rescan_uploads ():
245- if not session .get ('admin' ):
246- return redirect (url_for ('login' ))
254+ if not session .get ("admin" ):
255+ return redirect (url_for ("login" ))
256+
257+ def count_rows ():
258+ with sqlite3 .connect (DB_NAME ) as conn :
259+ cur = conn .cursor ()
260+ cur .execute ("SELECT COUNT(*) FROM uploads_log" )
261+ return cur .fetchone ()[0 ]
247262
248- before = _uploads_count ()
263+ before = count_rows ()
249264 try :
250- auto_log_material_files () # walks UPLOAD_FOLDER and UPSERTs rows
251- status = "ok"
265+ result = auto_log_material_files ()
252266 except Exception as e :
253- status = f"auto_log_material_files failed: { e } "
267+ return jsonify ({"status" : f"auto_log_material_files failed: { e } " }), 500
268+ after = count_rows ()
254269
255- after = _uploads_count ()
256270 return jsonify ({
257- "status" : status ,
271+ "status" : result .get ("status" ),
272+ "added_or_updated" : result .get ("added_or_updated" ),
258273 "rows_before" : before ,
259- "rows_after" : after ,
260- "added_or_updated" : after - before
274+ "rows_after" : after
261275 })
262276
263277# -- Admin login/logout --
0 commit comments