Skip to content

Commit 8e009ee

Browse files
author
SM_SAYEED
committed
admin rescanning of duplicates on fly disc
1 parent 7ff8d0c commit 8e009ee

File tree

1 file changed

+27
-163
lines changed

1 file changed

+27
-163
lines changed

app.py

Lines changed: 27 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -36,171 +36,9 @@ def allowed_results_file(filename):
3636
def allowed_music_file(filename):
3737
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_MUSIC_EXTENSIONS
3838

39-
# temporary debug function
40-
41-
# =========================
42-
# DEBUG: diagnostics helpers
43-
# =========================
44-
import json
45-
from collections import defaultdict
46-
from flask import jsonify
47-
from werkzeug.exceptions import abort
48-
49-
def _debug_safe_admin():
50-
# basic gate: session['admin'] must be truthy
51-
if not session.get('admin'):
52-
abort(403)
53-
54-
def _debug_walk_files(max_items=200):
55-
files = []
56-
try:
57-
root = os.path.abspath(UPLOAD_FOLDER)
58-
for r, _, f_list in os.walk(root):
59-
for fn in f_list:
60-
rel = os.path.relpath(os.path.join(r, fn), root)
61-
files.append(rel.replace("\\", "/"))
62-
if len(files) >= max_items:
63-
return files, True
64-
return files, False
65-
except Exception as e:
66-
return [f"ERROR walking UPLOAD_FOLDER: {e}"], False
67-
68-
def _debug_uploads_log_snapshot():
69-
data = []
70-
dups = defaultdict(list)
71-
distinct = set()
72-
try:
73-
with sqlite3.connect(DB_NAME) as conn:
74-
conn.row_factory = sqlite3.Row
75-
c = conn.cursor()
76-
# what DB are we using?
77-
db_abs = os.path.abspath(DB_NAME)
78-
# schema sanity
79-
schema = c.execute("SELECT name, sql FROM sqlite_master WHERE type='table' AND name='uploads_log'").fetchone()
80-
# rows
81-
rows = c.execute("""
82-
SELECT rowid, property, tab, filename,
83-
COALESCE(source,'') AS source,
84-
COALESCE(description,'') AS description,
85-
COALESCE(uploaded_at,'') AS uploaded_at
86-
FROM uploads_log
87-
ORDER BY uploaded_at DESC, rowid DESC
88-
""").fetchall()
89-
for r in rows:
90-
tup = (r["property"], r["tab"], r["filename"])
91-
distinct.add(tup)
92-
dups[tup].append(r["rowid"])
93-
data.append({
94-
"rowid": r["rowid"],
95-
"property": r["property"],
96-
"tab": r["tab"],
97-
"filename": r["filename"],
98-
"uploaded_at": r["uploaded_at"],
99-
})
100-
# find groups with more than 1 row
101-
dup_groups = [
102-
{"property": p, "tab": t, "filename": f, "rowids": ids, "count": len(ids)}
103-
for (p, t, f), ids in dups.items() if len(ids) > 1
104-
]
105-
return {
106-
"db_abs_path": db_abs,
107-
"uploads_log_table": schema["sql"] if schema else None,
108-
"total_rows": len(data),
109-
"distinct_triplets": len(distinct),
110-
"duplicates_groups": sorted(dup_groups, key=lambda x: -x["count"])[:50],
111-
"sample_rows": data[:50],
112-
}
113-
except Exception as e:
114-
return {"error": f"snapshot failed: {e}"}
115-
116-
def _debug_compare_property_tab(property_name, tab, max_items=200):
117-
report = {"property": property_name, "tab": tab}
118-
# FS view (what's actually on disk)
119-
fs_files = []
120-
try:
121-
base = os.path.join(UPLOAD_FOLDER, property_name, tab)
122-
base_abs = os.path.abspath(base)
123-
for r, _, f_list in os.walk(base_abs):
124-
for fn in f_list:
125-
rel = os.path.relpath(os.path.join(r, fn), os.path.abspath(UPLOAD_FOLDER)).replace("\\", "/")
126-
fs_files.append(rel)
127-
if len(fs_files) >= max_items:
128-
break
129-
report["fs_files"] = fs_files
130-
except Exception as e:
131-
report["fs_error"] = f"error walking FS: {e}"
132-
133-
# DB view (what uploads_log says)
134-
db_files = []
135-
try:
136-
with sqlite3.connect(DB_NAME) as conn:
137-
conn.row_factory = sqlite3.Row
138-
c = conn.cursor()
139-
rows = c.execute("""
140-
SELECT rowid, filename, COALESCE(uploaded_at,'') AS uploaded_at
141-
FROM uploads_log
142-
WHERE property = ? AND tab = ?
143-
ORDER BY uploaded_at DESC, rowid DESC
144-
""", (property_name, tab)).fetchall()
145-
for r in rows:
146-
db_files.append({
147-
"rowid": r["rowid"],
148-
"filename": r["filename"],
149-
"uploaded_at": r["uploaded_at"]
150-
})
151-
report["db_files"] = db_files
152-
except Exception as e:
153-
report["db_error"] = f"error reading DB: {e}"
154-
155-
# Cross-check: mark DB rows whose file isn’t on disk
156-
missing_on_disk = []
157-
for r in db_files:
158-
fp = os.path.join(UPLOAD_FOLDER, property_name, tab, r["filename"])
159-
if not os.path.isfile(fp):
160-
missing_on_disk.append({"rowid": r["rowid"], "filename": r["filename"]})
161-
report["db_entries_missing_on_disk"] = missing_on_disk
162-
163-
# Cross-check: mark files on disk that aren’t in DB
164-
db_names = set(r["filename"] for r in db_files)
165-
on_disk_not_in_db = []
166-
for rel in fs_files:
167-
parts = rel.split("/")
168-
if len(parts) >= 3 and parts[0] == property_name and parts[1] == tab:
169-
if parts[2] not in db_names:
170-
on_disk_not_in_db.append(parts[2])
171-
report["disk_files_not_in_db"] = sorted(set(on_disk_not_in_db))
172-
return report
173-
174-
# --------------------------
175-
# Admin-only debug endpoints
176-
# --------------------------
177-
@app.get("/admin/_diag/where")
178-
def admin_diag_where():
179-
_debug_safe_admin()
180-
UP_abs = os.path.abspath(UPLOAD_FOLDER)
181-
db_abs = os.path.abspath(DB_NAME)
182-
fs_list, truncated = _debug_walk_files()
183-
snap = _debug_uploads_log_snapshot()
184-
return jsonify({
185-
"UPLOAD_FOLDER": UP_abs,
186-
"DB_NAME": db_abs,
187-
"walk_uploads_folder_count": len(fs_list),
188-
"walk_uploads_folder_truncated": truncated,
189-
"walk_uploads_folder_sample": fs_list[:50],
190-
"uploads_log_snapshot": snap
191-
})
192-
193-
@app.get("/admin/_diag/compare/<property_name>/<tab>")
194-
def admin_diag_compare(property_name, tab):
195-
_debug_safe_admin()
196-
return jsonify(_debug_compare_property_tab(property_name, tab))
197-
# =========================
198-
# END DEBUG
199-
# =========================
200-
20139
# ========== Helper Functions ========== #
20240

203-
# the current auto_log_material_files() ---
41+
# auto_log_material_files ---
20442
def ensure_uploads_log_schema():
20543
# Creates the table and backfills missing columns so existing DBs keep working
20644
with sqlite3.connect(DB_NAME) as conn:
@@ -395,6 +233,32 @@ def _startup_once():
395233
_run_startup_tasks()
396234

397235
# ========== ROUTES ==========
236+
# Admin only rescanning for duplicates and re-importing
237+
def _uploads_count():
238+
import sqlite3
239+
with sqlite3.connect(DB_NAME) as conn:
240+
(n,) = conn.execute("SELECT COUNT(*) FROM uploads_log").fetchone()
241+
return n
242+
243+
@app.route('/admin/rescan_uploads', methods=['GET', 'POST'])
244+
def admin_rescan_uploads():
245+
if not session.get('admin'):
246+
return redirect(url_for('login'))
247+
248+
before = _uploads_count()
249+
try:
250+
auto_log_material_files() # walks UPLOAD_FOLDER and UPSERTs rows
251+
status = "ok"
252+
except Exception as e:
253+
status = f"auto_log_material_files failed: {e}"
254+
255+
after = _uploads_count()
256+
return jsonify({
257+
"status": status,
258+
"rows_before": before,
259+
"rows_after": after,
260+
"added_or_updated": after - before
261+
})
398262

399263
# -- Admin login/logout --
400264
@app.route('/login', methods=['GET', 'POST'])

0 commit comments

Comments
 (0)