Skip to content

Commit 85d12a5

Browse files
author
SM_SAYEED
committed
log duplication debugged.
1 parent ec73389 commit 85d12a5

File tree

1 file changed

+88
-48
lines changed

1 file changed

+88
-48
lines changed

app.py

Lines changed: 88 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,69 +19,109 @@
1919

2020
# Automation of import to sqlite3 database
2121
def auto_import_uploads():
22+
"""
23+
Import CSV/NPY datasets from uploads/<property>/<dataset>/ into SQLite tables.
24+
- Skips music under uploads/clips/
25+
- Does NOT write to uploads_log (logging handled elsewhere)
26+
- Re-imports only when source file mtime changed (tracked in import_etag)
27+
"""
2228
if not os.path.exists(UPLOAD_FOLDER):
23-
return
29+
print("auto_import_uploads: uploads/ folder not found, skipping.")
30+
return 0
2431

25-
for root, dirs, files in os.walk(UPLOAD_FOLDER):
26-
for filename in files:
27-
ext = filename.rsplit('.', 1)[1].lower()
28-
if ext not in ['csv', 'npy']:
29-
continue
32+
ALLOWED_IMPORT_EXTS = {'csv', 'npy'}
33+
imported = 0
3034

31-
filepath = os.path.join(root, filename)
32-
table_name = filename.replace('.', '_').replace('-', '_').replace('/', '_').replace('\\', '_')
35+
def tableize(name: str) -> str:
36+
# Stable, safe table name from filename only (not full path)
37+
# e.g. "bandgap.csv" -> "bandgap_csv"
38+
t = name.replace('.', '_').replace('-', '_').replace(' ', '_')
39+
return re.sub(r'[^0-9a-zA-Z_]', '_', t)
3340

34-
try:
35-
# Load data
36-
if ext == 'csv':
37-
df = pd.read_csv(filepath)
38-
elif ext == 'npy':
39-
arr = np.load(filepath, allow_pickle=True)
40-
if isinstance(arr, np.ndarray):
41-
if arr.ndim == 2:
42-
df = pd.DataFrame(arr)
43-
elif arr.ndim == 1 and hasattr(arr[0], 'dtype') and arr[0].dtype.names:
44-
df = pd.DataFrame(arr)
45-
else:
46-
df = pd.DataFrame(arr)
47-
else:
48-
continue # unsupported NPY format
49-
else:
41+
with sqlite3.connect(DB_NAME) as conn:
42+
c = conn.cursor()
43+
# Track file mtimes to avoid unnecessary re-imports
44+
c.execute("""
45+
CREATE TABLE IF NOT EXISTS import_etag (
46+
relpath TEXT PRIMARY KEY,
47+
mtime REAL NOT NULL
48+
)
49+
""")
50+
conn.commit()
51+
52+
for root, _, files in os.walk(UPLOAD_FOLDER):
53+
# Skip music tree
54+
rel_root = os.path.relpath(root, UPLOAD_FOLDER)
55+
if rel_root.split(os.sep)[0] == 'clips':
56+
continue
57+
58+
for filename in files:
59+
if filename.startswith('.'):
60+
continue
61+
ext = filename.rsplit('.', 1)[-1].lower()
62+
if ext not in ALLOWED_IMPORT_EXTS:
5063
continue
5164

52-
# Write to SQLite
53-
with sqlite3.connect(DB_NAME) as conn:
54-
df.to_sql(table_name, conn, if_exists='replace', index=False)
65+
filepath = os.path.join(root, filename)
66+
relpath = os.path.relpath(filepath, UPLOAD_FOLDER)
67+
mtime = os.path.getmtime(filepath)
68+
table_name = tableize(filename)
5569

56-
print(f"Imported: {filename} as table '{table_name}'")
70+
# Check etag (mtime)
71+
c.execute("SELECT mtime FROM import_etag WHERE relpath=?", (relpath,))
72+
row = c.fetchone()
73+
if row and float(row[0]) == float(mtime):
74+
# up-to-date, skip
75+
continue
5776

58-
# Auto-log into uploads_log if possible
59-
rel_path = os.path.relpath(filepath, UPLOAD_FOLDER)
60-
parts = rel_path.split(os.sep)
77+
# Load into DataFrame
78+
try:
79+
if ext == 'csv':
80+
df = pd.read_csv(filepath)
81+
else: # npy
82+
arr = np.load(filepath, allow_pickle=True)
83+
if isinstance(arr, np.ndarray):
84+
if arr.ndim == 2:
85+
df = pd.DataFrame(arr)
86+
elif arr.ndim == 1 and hasattr(arr.dtype, 'names') and arr.dtype.names:
87+
# structured array -> DataFrame with named columns
88+
df = pd.DataFrame(arr.tolist(), columns=list(arr.dtype.names))
89+
else:
90+
df = pd.DataFrame(arr)
91+
else:
92+
print(f"auto_import_uploads: unsupported NPY structure for {relpath}, skipping.")
93+
continue
94+
except Exception as e:
95+
print(f"auto_import_uploads: failed to read {relpath}: {e}")
96+
continue
6197

62-
if len(parts) >= 3:
63-
property_name = parts[0]
64-
tab = parts[1]
65-
file_name = parts[2]
66-
with sqlite3.connect(DB_NAME) as conn:
67-
c = conn.cursor()
68-
c.execute("""
69-
INSERT OR IGNORE INTO uploads_log (property, tab, filename, uploaded_at)
70-
VALUES (?, ?, ?, ?)
71-
""", (property_name, tab, file_name, datetime.datetime.now().isoformat()))
72-
conn.commit()
73-
print(f"Logged {file_name} to uploads_log.")
74-
else:
75-
print(f"Skipped logging for {filename} (not in expected folder structure).")
98+
# Import into SQLite (replace whole table)
99+
try:
100+
df.to_sql(table_name, conn, if_exists='replace', index=False)
101+
c.execute("REPLACE INTO import_etag (relpath, mtime) VALUES (?, ?)", (relpath, mtime))
102+
conn.commit()
103+
imported += 1
104+
print(f"auto_import_uploads: imported {relpath} -> table '{table_name}'")
105+
except Exception as e:
106+
print(f"auto_import_uploads: failed to import {relpath} to '{table_name}': {e}")
76107

77-
except Exception as e:
78-
print(f"Failed to import {filename}: {e}")
108+
print(f"auto_import_uploads: done, {imported} table(s) updated.")
109+
return imported
79110

80111
def auto_log_material_files():
81112
if not os.path.exists(UPLOAD_FOLDER):
82113
return
83114

84-
all_allowed_exts = ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS | ALLOWED_MUSIC_EXTENSIONS
115+
all_allowed_exts = ALLOWED_DATASET_EXTENSIONS | ALLOWED_RESULTS_EXTENSIONS
116+
117+
with sqlite3.connect(DB_NAME) as conn:
118+
c = conn.cursor()
119+
# Ensure uniqueness constraint exists
120+
c.execute("""
121+
CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_uploads
122+
ON uploads_log(property, tab, filename)
123+
""")
124+
conn.commit()
85125

86126
for root, dirs, files in os.walk(UPLOAD_FOLDER):
87127
for filename in files:

0 commit comments

Comments
 (0)