|
1 | | -# Test deploy via GitHub Actions. This app will temporarily run on a public IP. |
2 | | -# It is a Flask web application that allows users to upload datasets, view results, and manage |
| 1 | + # Toydatabase for Patterns Matter # |
| 2 | +# ======= Imports ====== # |
| 3 | + |
3 | 4 | from flask import Flask, request, redirect, url_for, render_template, send_from_directory, flash, session, current_app, abort, jsonify |
4 | 5 | import os |
5 | 6 | import pandas as pd |
|
9 | 10 | import datetime |
10 | 11 | import re |
11 | 12 | import csv |
| 13 | + |
12 | 14 | # ========== SETTINGS ========== |
| 15 | + |
13 | 16 | UPLOAD_FOLDER = 'uploads' |
14 | 17 | DB_NAME = 'patterns-matter.db' # SQLite database file |
15 | 18 | ADMIN_PASSWORD = 'IronMa1deN!' |
|
18 | 21 | ALLOWED_RESULTS_EXTENSIONS = {'jpg', 'jpeg', 'png', 'gif', 'pdf', 'docx'} |
19 | 22 | ALLOWED_MUSIC_EXTENSIONS = {'mp3', 'wav', 'm4a', 'ogg', 'mp4'} |
20 | 23 |
|
| 24 | +app = Flask(__name__) |
| 25 | +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
| 26 | +app.secret_key = 'IronMa1deN!' |
| 27 | + |
| 28 | +# ---------- Utility Functions ---------- |
| 29 | + |
| 30 | +def allowed_dataset_file(filename): |
| 31 | + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DATASET_EXTENSIONS |
| 32 | + |
| 33 | +def allowed_results_file(filename): |
| 34 | + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_RESULTS_EXTENSIONS |
| 35 | + |
| 36 | +def allowed_music_file(filename): |
| 37 | + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_MUSIC_EXTENSIONS |
| 38 | + |
| 39 | +# ========== Helper Functions ========== # |
| 40 | + |
| 41 | +# the current auto_log_material_files() --- |
| 42 | +def ensure_uploads_log_schema(): |
| 43 | + # Creates the table and backfills missing columns so existing DBs keep working |
| 44 | + with sqlite3.connect(DB_NAME) as conn: |
| 45 | + c = conn.cursor() |
| 46 | + c.execute(""" |
| 47 | + CREATE TABLE IF NOT EXISTS uploads_log ( |
| 48 | + property TEXT NOT NULL, |
| 49 | + tab TEXT NOT NULL, |
| 50 | + filename TEXT NOT NULL, |
| 51 | + uploaded_at TEXT, |
| 52 | + source TEXT, |
| 53 | + description TEXT |
| 54 | + ) |
| 55 | + """) |
| 56 | + # Backfill columns if the table already existed without them |
| 57 | + existing = {row[1] for row in c.execute("PRAGMA table_info(uploads_log)")} |
| 58 | + for col, ddl in [ |
| 59 | + ("uploaded_at", "ALTER TABLE uploads_log ADD COLUMN uploaded_at TEXT"), |
| 60 | + ("source", "ALTER TABLE uploads_log ADD COLUMN source TEXT"), |
| 61 | + ("description", "ALTER TABLE uploads_log ADD COLUMN description TEXT"), |
| 62 | + ]: |
| 63 | + if col not in existing: |
| 64 | + c.execute(ddl) |
| 65 | + |
| 66 | + c.execute(""" |
| 67 | + CREATE UNIQUE INDEX IF NOT EXISTS idx_uploads_unique |
| 68 | + ON uploads_log(property, tab, filename) |
| 69 | + """) |
| 70 | + conn.commit() |
| 71 | + |
| 72 | +def auto_log_material_files(): |
| 73 | + ensure_uploads_log_schema() |
| 74 | + |
| 75 | + # Avoid relying on current_app when we can read from app.config directly |
| 76 | + upload_root = app.config.get("UPLOAD_FOLDER", UPLOAD_FOLDER) |
| 77 | + if not os.path.exists(upload_root): |
| 78 | + return |
| 79 | + |
| 80 | + all_allowed_exts = ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS |
| 81 | + to_insert = [] |
| 82 | + |
| 83 | + for root, _, files in os.walk(upload_root): |
| 84 | + rel_root = os.path.relpath(root, upload_root) |
| 85 | + if rel_root.split(os.sep)[0] == "clips": |
| 86 | + continue |
| 87 | + |
| 88 | + for fname in files: |
| 89 | + ext = fname.rsplit(".", 1)[-1].lower() if "." in fname else "" |
| 90 | + if ext not in all_allowed_exts: |
| 91 | + continue |
| 92 | + |
| 93 | + rel_path = os.path.relpath(os.path.join(root, fname), upload_root) |
| 94 | + parts = rel_path.split(os.sep) |
| 95 | + if len(parts) >= 3: |
| 96 | + property_name, tab, file_name = parts[0], parts[1], parts[2] |
| 97 | + to_insert.append((property_name, tab, file_name)) |
| 98 | + |
| 99 | + if not to_insert: |
| 100 | + return |
| 101 | + |
| 102 | + with sqlite3.connect(DB_NAME) as conn: |
| 103 | + c = conn.cursor() |
| 104 | + # Upsert: if the file is already logged, refresh uploaded_at |
| 105 | + c.executemany(""" |
| 106 | + INSERT INTO uploads_log (property, tab, filename, uploaded_at) |
| 107 | + VALUES (?, ?, ?, CURRENT_TIMESTAMP) |
| 108 | + ON CONFLICT(property, tab, filename) |
| 109 | + DO UPDATE SET uploaded_at=excluded.uploaded_at |
| 110 | + """, to_insert) |
| 111 | + conn.commit() |
| 112 | + |
21 | 113 | # Automation of import to sqlite3 database |
22 | 114 | def auto_import_uploads(): |
23 | 115 | """ |
@@ -109,107 +201,36 @@ def tableize(name: str) -> str: |
109 | 201 | print(f"auto_import_uploads: done, {imported} table(s) updated.") |
110 | 202 | return imported |
111 | 203 |
|
112 | | -# the current auto_log_material_files() --- |
113 | | -def ensure_uploads_log_schema(): |
114 | | - # Creates the table and backfills missing columns so existing DBs keep working |
115 | | - with sqlite3.connect(DB_NAME) as conn: |
116 | | - c = conn.cursor() |
117 | | - c.execute(""" |
118 | | - CREATE TABLE IF NOT EXISTS uploads_log ( |
119 | | - property TEXT NOT NULL, |
120 | | - tab TEXT NOT NULL, |
121 | | - filename TEXT NOT NULL, |
122 | | - uploaded_at TEXT, |
123 | | - source TEXT, |
124 | | - description TEXT |
125 | | - ) |
126 | | - """) |
127 | | - # Backfill columns if the table already existed without them |
128 | | - existing = {row[1] for row in c.execute("PRAGMA table_info(uploads_log)")} |
129 | | - for col, ddl in [ |
130 | | - ("uploaded_at", "ALTER TABLE uploads_log ADD COLUMN uploaded_at TEXT"), |
131 | | - ("source", "ALTER TABLE uploads_log ADD COLUMN source TEXT"), |
132 | | - ("description", "ALTER TABLE uploads_log ADD COLUMN description TEXT"), |
133 | | - ]: |
134 | | - if col not in existing: |
135 | | - c.execute(ddl) |
136 | | - |
137 | | - c.execute(""" |
138 | | - CREATE UNIQUE INDEX IF NOT EXISTS idx_uploads_unique |
139 | | - ON uploads_log(property, tab, filename) |
140 | | - """) |
141 | | - conn.commit() |
142 | | - |
143 | | - |
144 | | -def auto_log_material_files(): |
145 | | - ensure_uploads_log_schema() |
| 204 | +# Run-once warm-up |
146 | 205 |
|
147 | | - # Avoid relying on current_app when we can read from app.config directly |
148 | | - upload_root = app.config.get("UPLOAD_FOLDER", UPLOAD_FOLDER) |
149 | | - if not os.path.exists(upload_root): |
150 | | - return |
| 206 | +from threading import Lock |
151 | 207 |
|
152 | | - all_allowed_exts = ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS |
153 | | - to_insert = [] |
154 | | - |
155 | | - for root, _, files in os.walk(upload_root): |
156 | | - rel_root = os.path.relpath(root, upload_root) |
157 | | - if rel_root.split(os.sep)[0] == "clips": |
158 | | - continue |
159 | | - |
160 | | - for fname in files: |
161 | | - ext = fname.rsplit(".", 1)[-1].lower() if "." in fname else "" |
162 | | - if ext not in all_allowed_exts: |
163 | | - continue |
| 208 | +_startup_done = False |
| 209 | +_startup_lock = Lock() |
164 | 210 |
|
165 | | - rel_path = os.path.relpath(os.path.join(root, fname), upload_root) |
166 | | - parts = rel_path.split(os.sep) |
167 | | - if len(parts) >= 3: |
168 | | - property_name, tab, file_name = parts[0], parts[1], parts[2] |
169 | | - to_insert.append((property_name, tab, file_name)) |
170 | | - |
171 | | - if not to_insert: |
172 | | - return |
173 | | - |
174 | | - with sqlite3.connect(DB_NAME) as conn: |
175 | | - c = conn.cursor() |
176 | | - # Upsert: if the file is already logged, refresh uploaded_at |
177 | | - c.executemany(""" |
178 | | - INSERT INTO uploads_log (property, tab, filename, uploaded_at) |
179 | | - VALUES (?, ?, ?, CURRENT_TIMESTAMP) |
180 | | - ON CONFLICT(property, tab, filename) |
181 | | - DO UPDATE SET uploaded_at=excluded.uploaded_at |
182 | | - """, to_insert) |
183 | | - conn.commit() |
184 | | - |
185 | | - |
186 | | -# ========== FLASK APP ========== |
187 | | - |
188 | | -app = Flask(__name__) |
189 | | -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
190 | | -app.secret_key = 'IronMa1deN!' |
191 | | - |
192 | | -@app.before_first_request |
193 | | -def _warm_up(): |
194 | | - try: |
195 | | - auto_import_uploads() |
196 | | - except Exception as e: |
197 | | - app.logger.warning("auto_import_uploads skipped: %s", e) |
198 | | - try: |
199 | | - auto_log_material_files() |
200 | | - except Exception as e: |
201 | | - app.logger.warning("auto_log_material_files skipped: %s", e) |
202 | | - |
203 | | - |
204 | | -# ---------- Utility Functions ---------- |
205 | | -def allowed_dataset_file(filename): |
206 | | - return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DATASET_EXTENSIONS |
207 | | - |
208 | | -def allowed_results_file(filename): |
209 | | - return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_RESULTS_EXTENSIONS |
| 211 | +def _run_startup_tasks(): |
| 212 | + global _startup_done |
| 213 | + with _startup_lock: |
| 214 | + if _startup_done: |
| 215 | + return |
| 216 | + try: |
| 217 | + ensure_uploads_log_schema() # if you have this helper; otherwise drop it |
| 218 | + except Exception as e: |
| 219 | + app.logger.warning("ensure_uploads_log_schema skipped: %s", e) |
| 220 | + try: |
| 221 | + auto_import_uploads() |
| 222 | + except Exception as e: |
| 223 | + app.logger.warning("auto_import_uploads skipped: %s", e) |
| 224 | + try: |
| 225 | + auto_log_material_files() |
| 226 | + except Exception as e: |
| 227 | + app.logger.warning("auto_log_material_files skipped: %s", e) |
| 228 | + _startup_done = True |
210 | 229 |
|
211 | | -def allowed_music_file(filename): |
212 | | - return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_MUSIC_EXTENSIONS |
| 230 | +@app.before_request |
| 231 | +def _startup_once(): |
| 232 | + if not _startup_done: |
| 233 | + _run_startup_tasks() |
213 | 234 |
|
214 | 235 | # ========== ROUTES ========== |
215 | 236 |
|
|
0 commit comments