Skip to content

Commit f0e4c01

Browse files
author
SM_SAYEED
committed
modification on rescan_uploads
1 parent 8e009ee commit f0e4c01

File tree

1 file changed

+76
-62
lines changed

1 file changed

+76
-62
lines changed

app.py

Lines changed: 76 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -38,77 +38,88 @@ def allowed_music_file(filename):
3838

3939
# ========== Helper Functions ========== #
4040

41-
# auto_log_material_files ---
4241
def ensure_uploads_log_schema():
43-
# Creates the table and backfills missing columns so existing DBs keep working
42+
"""Create/upgrade uploads_log to the expected schema; ensure uniqueness."""
4443
with sqlite3.connect(DB_NAME) as conn:
4544
c = conn.cursor()
45+
# Create table if missing (includes UNIQUE on the key)
4646
c.execute("""
47-
CREATE TABLE IF NOT EXISTS uploads_log (
48-
property TEXT NOT NULL,
49-
tab TEXT NOT NULL,
50-
filename TEXT NOT NULL,
51-
uploaded_at TEXT,
52-
source TEXT,
53-
description TEXT
54-
)
47+
CREATE TABLE IF NOT EXISTS uploads_log (
48+
id INTEGER PRIMARY KEY AUTOINCREMENT,
49+
property TEXT NOT NULL,
50+
tab TEXT NOT NULL,
51+
filename TEXT NOT NULL,
52+
uploaded_at TEXT,
53+
UNIQUE(property, tab, filename)
54+
)
5555
""")
56-
# Backfill columns if the table already existed without them
57-
existing = {row[1] for row in c.execute("PRAGMA table_info(uploads_log)")}
58-
for col, ddl in [
59-
("uploaded_at", "ALTER TABLE uploads_log ADD COLUMN uploaded_at TEXT"),
60-
("source", "ALTER TABLE uploads_log ADD COLUMN source TEXT"),
61-
("description", "ALTER TABLE uploads_log ADD COLUMN description TEXT"),
62-
]:
63-
if col not in existing:
64-
c.execute(ddl)
56+
# Ensure uploaded_at column exists (for older DBs)
57+
cols = {row[1] for row in c.execute("PRAGMA table_info(uploads_log)").fetchall()}
58+
if "uploaded_at" not in cols:
59+
c.execute("ALTER TABLE uploads_log ADD COLUMN uploaded_at TEXT")
60+
# Try to migrate from legacy logged_at if it exists
61+
try:
62+
c.execute("UPDATE uploads_log SET uploaded_at = COALESCE(uploaded_at, logged_at) WHERE uploaded_at IS NULL")
63+
except sqlite3.OperationalError:
64+
pass # logged_at may not exist; ignore
6565

66+
# Ensure a unique index exists even if the table was created long ago
6667
c.execute("""
67-
CREATE UNIQUE INDEX IF NOT EXISTS idx_uploads_unique
68-
ON uploads_log(property, tab, filename)
68+
CREATE UNIQUE INDEX IF NOT EXISTS idx_uploads_unique
69+
ON uploads_log(property, tab, filename)
6970
""")
7071
conn.commit()
7172

7273
def auto_log_material_files():
74+
"""
75+
Walk UPLOAD_FOLDER and upsert one row per (property, tab, filename).
76+
Idempotent: safe to call many times; never throws UNIQUE errors.
77+
"""
7378
ensure_uploads_log_schema()
7479

75-
# Avoid relying on current_app when we can read from app.config directly
76-
upload_root = app.config.get("UPLOAD_FOLDER", UPLOAD_FOLDER)
77-
if not os.path.exists(upload_root):
78-
return
80+
root_dir = UPLOAD_FOLDER
81+
if not os.path.exists(root_dir):
82+
return {"status": "skip", "reason": "UPLOAD_FOLDER missing", "added_or_updated": 0}
7983

80-
all_allowed_exts = ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS
81-
to_insert = []
82-
83-
for root, _, files in os.walk(upload_root):
84-
rel_root = os.path.relpath(root, upload_root)
85-
if rel_root.split(os.sep)[0] == "clips":
86-
continue
84+
allowed_exts = (ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS)
85+
rows = [] # (property, tab, filename, uploaded_at)
8786

87+
for root, _dirs, files in os.walk(root_dir):
8888
for fname in files:
89-
ext = fname.rsplit(".", 1)[-1].lower() if "." in fname else ""
90-
if ext not in all_allowed_exts:
89+
ext = fname.rsplit('.', 1)[-1].lower() if '.' in fname else ''
90+
if ext not in allowed_exts:
9191
continue
9292

93-
rel_path = os.path.relpath(os.path.join(root, fname), upload_root)
93+
full = os.path.join(root, fname)
94+
rel_path = os.path.relpath(full, root_dir) # e.g. bandgap/dataset/foo.csv
9495
parts = rel_path.split(os.sep)
96+
97+
# Skip music under uploads/clips/
98+
if parts and parts[0] == 'clips':
99+
continue
100+
95101
if len(parts) >= 3:
96-
property_name, tab, file_name = parts[0], parts[1], parts[2]
97-
to_insert.append((property_name, tab, file_name))
102+
prop, tab, filename = parts[0], parts[1], parts[2]
103+
rows.append((prop, tab, filename, datetime.utcnow().isoformat(timespec="seconds")))
98104

99-
if not to_insert:
100-
return
105+
if not rows:
106+
return {"status": "ok", "added_or_updated": 0}
101107

108+
added_or_updated = 0
102109
with sqlite3.connect(DB_NAME) as conn:
103110
c = conn.cursor()
104-
# Upsert: if the file is already logged, refresh uploaded_at
105-
c.executemany("""
106-
INSERT INTO uploads_log (property, tab, filename, uploaded_at)
107-
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
108-
ON CONFLICT(property, tab, filename)
109-
DO UPDATE SET uploaded_at=excluded.uploaded_at
110-
""", to_insert)
111+
upsert = """
112+
INSERT INTO uploads_log (property, tab, filename, uploaded_at)
113+
VALUES (?, ?, ?, ?)
114+
ON CONFLICT(property, tab, filename)
115+
DO UPDATE SET uploaded_at = excluded.uploaded_at
116+
"""
117+
for r in rows:
118+
c.execute(upsert, r)
119+
added_or_updated += 1
111120
conn.commit()
121+
122+
return {"status": "ok", "added_or_updated": added_or_updated}
112123

113124
# Automation of import to sqlite3 database
114125
def auto_import_uploads():
@@ -231,33 +242,36 @@ def _run_startup_tasks():
231242
def _startup_once():
232243
if not _startup_done:
233244
_run_startup_tasks()
245+
234246

235247
# ========== ROUTES ==========
248+
249+
236250
# Admin only rescanning for duplicates and re-importing
237-
def _uploads_count():
238-
import sqlite3
239-
with sqlite3.connect(DB_NAME) as conn:
240-
(n,) = conn.execute("SELECT COUNT(*) FROM uploads_log").fetchone()
241-
return n
242251

243-
@app.route('/admin/rescan_uploads', methods=['GET', 'POST'])
252+
@app.route("/admin/rescan_uploads")
244253
def admin_rescan_uploads():
245-
if not session.get('admin'):
246-
return redirect(url_for('login'))
254+
if not session.get("admin"):
255+
return redirect(url_for("login"))
256+
257+
def count_rows():
258+
with sqlite3.connect(DB_NAME) as conn:
259+
cur = conn.cursor()
260+
cur.execute("SELECT COUNT(*) FROM uploads_log")
261+
return cur.fetchone()[0]
247262

248-
before = _uploads_count()
263+
before = count_rows()
249264
try:
250-
auto_log_material_files() # walks UPLOAD_FOLDER and UPSERTs rows
251-
status = "ok"
265+
result = auto_log_material_files()
252266
except Exception as e:
253-
status = f"auto_log_material_files failed: {e}"
267+
return jsonify({"status": f"auto_log_material_files failed: {e}"}), 500
268+
after = count_rows()
254269

255-
after = _uploads_count()
256270
return jsonify({
257-
"status": status,
271+
"status": result.get("status"),
272+
"added_or_updated": result.get("added_or_updated"),
258273
"rows_before": before,
259-
"rows_after": after,
260-
"added_or_updated": after - before
274+
"rows_after": after
261275
})
262276

263277
# -- Admin login/logout --

0 commit comments

Comments
 (0)