Skip to content

Commit d554110

Browse files
committed
supports backend caching of the db file and managing the file in frontend
1 parent c31ad44 commit d554110

File tree

5 files changed

+353
-37
lines changed

5 files changed

+353
-37
lines changed

py-src/data_formulator/app.py

Lines changed: 193 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
APP_ROOT = Path(os.path.join(Path(__file__).parent)).absolute()
4848

4949
import os
50+
import tempfile
5051

5152
app = Flask(__name__, static_url_path='', static_folder=os.path.join(APP_ROOT, "dist"))
5253
app.secret_key = secrets.token_hex(16) # Generate a random secret key for sessions
@@ -555,15 +556,27 @@ def request_code_expl():
555556
expl = ""
556557
return expl
557558

558-
@app.route('/api/get-session-id', methods=['GET'])
559+
@app.route('/api/get-session-id', methods=['GET', 'POST'])
559560
def get_session_id():
560561
"""Endpoint to get or confirm a session ID from the client"""
562+
# if it is a POST request, we expect a session_id in the body
563+
# if it is a GET request, we do not expect a session_id in the query params
561564

565+
current_session_id = None
566+
if request.is_json:
567+
content = request.get_json()
568+
current_session_id = content.get("session_id", None)
569+
562570
# Create session if it doesn't exist
563-
if 'session_id' not in session:
564-
session['session_id'] = secrets.token_hex(16)
565-
session.permanent = True
566-
logger.info(f"Created new session: {session['session_id']}")
571+
if current_session_id is None:
572+
if 'session_id' not in session:
573+
session['session_id'] = secrets.token_hex(16)
574+
session.permanent = True
575+
logger.info(f"Created new session: {session['session_id']}")
576+
else:
577+
# override the session_id
578+
session['session_id'] = current_session_id
579+
session.permanent = True
567580

568581
return flask.jsonify({
569582
"status": "ok",
@@ -575,15 +588,9 @@ def get_session_id():
575588
def get_app_config():
576589
"""Provide frontend configuration settings from environment variables"""
577590

578-
# Create session if it doesn't exist
579-
if 'session_id' not in session:
580-
session['session_id'] = secrets.token_hex(16)
581-
session.permanent = True
582-
logger.info(f"Created new session: {session['session_id']}")
583-
584591
config = {
585592
"SHOW_KEYS_ENABLED": os.getenv("SHOW_KEYS_ENABLED", "true").lower() == "true",
586-
"SESSION_ID": session['session_id']
593+
"SESSION_ID": session.get('session_id', None)
587594
}
588595
return flask.jsonify(config)
589596

@@ -603,19 +610,32 @@ def list_tables():
603610
# Get row count
604611
row_count = db.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
605612
sample_rows = db.execute(f"SELECT * FROM {table_name} LIMIT 1000").fetchall()
613+
614+
# Check if this is a view or a table
615+
is_view = False
616+
try:
617+
# In most SQL databases, views are listed in a system table
618+
# For DuckDB, we can check if it's a view by querying the system tables
619+
view_check = db.execute(f"SELECT * FROM duckdb_views() WHERE view_name = '{table_name}'").fetchone()
620+
is_view = view_check is not None
621+
except Exception:
622+
# If the query fails, assume it's a regular table
623+
pass
606624

607625
result.append({
608626
"name": table_name,
609627
"columns": [{"name": col[0], "type": col[1]} for col in columns],
610628
"row_count": row_count,
611-
"sample_rows": [dict(zip([col[0] for col in columns], row)) for row in sample_rows]
629+
"sample_rows": [dict(zip([col[0] for col in columns], row)) for row in sample_rows],
630+
"is_view": is_view,
612631
})
613632

614633
return jsonify({
615634
"status": "success",
616635
"tables": result
617636
})
618637
except Exception as e:
638+
print(e)
619639
return jsonify({
620640
"status": "error",
621641
"message": str(e)
@@ -821,6 +841,166 @@ def drop_table():
821841
}), 500
822842

823843

844+
@app.route('/api/tables/upload-db-file', methods=['POST'])
845+
def upload_db_file():
846+
"""Upload a db file"""
847+
try:
848+
if 'file' not in request.files:
849+
return jsonify({"status": "error", "message": "No file provided"}), 400
850+
851+
file = request.files['file']
852+
if not file.filename.endswith('.db'):
853+
return jsonify({"status": "error", "message": "Invalid file format. Only .db files are supported"}), 400
854+
855+
# Get the session ID
856+
if 'session_id' not in session:
857+
return jsonify({"status": "error", "message": "No session ID found"}), 400
858+
859+
session_id = session['session_id']
860+
861+
# Create temp directory if it doesn't exist
862+
temp_dir = os.path.join(tempfile.gettempdir())
863+
os.makedirs(temp_dir, exist_ok=True)
864+
865+
# Save the file temporarily to verify it
866+
temp_db_path = os.path.join(temp_dir, f"temp_{session_id}.db")
867+
file.save(temp_db_path)
868+
869+
# Verify if it's a valid DuckDB file
870+
try:
871+
import duckdb
872+
# Try to connect to the database
873+
conn = duckdb.connect(temp_db_path, read_only=True)
874+
# Try a simple query to verify it's a valid database
875+
conn.execute("SELECT 1").fetchall()
876+
conn.close()
877+
878+
# If we get here, the file is valid - move it to final location
879+
db_file_path = os.path.join(temp_dir, f"df_{session_id}.db")
880+
os.replace(temp_db_path, db_file_path)
881+
882+
# Update the db_manager's file mapping
883+
db_manager._db_files[session_id] = db_file_path
884+
885+
except Exception as db_error:
886+
# Clean up temp file
887+
if os.path.exists(temp_db_path):
888+
os.remove(temp_db_path)
889+
return jsonify({
890+
"status": "error",
891+
"message": f"Invalid DuckDB database file: {str(db_error)}"
892+
}), 400
893+
894+
return jsonify({
895+
"status": "success",
896+
"message": "Database file uploaded successfully",
897+
"session_id": session_id
898+
})
899+
900+
except Exception as e:
901+
logger.error(f"Error uploading database file: {str(e)}")
902+
return jsonify({
903+
"status": "error",
904+
"message": f"Failed to upload database file: {str(e)}"
905+
}), 500
906+
907+
@app.route('/api/tables/download-db-file', methods=['GET'])
908+
def download_db_file():
909+
"""Download the db file for a session"""
910+
try:
911+
# Check if session exists
912+
if 'session_id' not in session:
913+
return jsonify({
914+
"status": "error",
915+
"message": "No session ID found"
916+
}), 400
917+
918+
session_id = session['session_id']
919+
920+
# Get the database file path from db_manager
921+
if session_id not in db_manager._db_files:
922+
return jsonify({
923+
"status": "error",
924+
"message": "No database file found for this session"
925+
}), 404
926+
927+
db_file_path = db_manager._db_files[session_id]
928+
929+
# Check if file exists
930+
if not os.path.exists(db_file_path):
931+
return jsonify({
932+
"status": "error",
933+
"message": "Database file not found"
934+
}), 404
935+
936+
# Generate a filename for download
937+
download_name = f"data_formulator_{session_id}.db"
938+
939+
# Return the file as an attachment
940+
return send_from_directory(
941+
os.path.dirname(db_file_path),
942+
os.path.basename(db_file_path),
943+
as_attachment=True,
944+
download_name=download_name,
945+
mimetype='application/x-sqlite3'
946+
)
947+
948+
except Exception as e:
949+
logger.error(f"Error downloading database file: {str(e)}")
950+
return jsonify({
951+
"status": "error",
952+
"message": f"Failed to download database file: {str(e)}"
953+
}), 500
954+
955+
956+
@app.route('/api/tables/reset-db-file', methods=['POST'])
957+
def reset_db_file():
958+
"""Reset the db file for a session"""
959+
try:
960+
if 'session_id' not in session:
961+
return jsonify({
962+
"status": "error",
963+
"message": "No session ID found"
964+
}), 400
965+
966+
session_id = session['session_id']
967+
968+
print(f"session_id: {session_id}")
969+
970+
# First check if there's a reference in db_manager
971+
if session_id in db_manager._db_files:
972+
db_file_path = db_manager._db_files[session_id]
973+
974+
# Remove the file if it exists
975+
if db_file_path and os.path.exists(db_file_path):
976+
os.remove(db_file_path)
977+
978+
# Clear the reference
979+
db_manager._db_files[session_id] = None
980+
981+
# Also check for any temporary files
982+
temp_db_path = os.path.join(tempfile.gettempdir(), f"temp_{session_id}.db")
983+
if os.path.exists(temp_db_path):
984+
os.remove(temp_db_path)
985+
986+
# Check for the main db file
987+
main_db_path = os.path.join(tempfile.gettempdir(), f"df_{session_id}.db")
988+
if os.path.exists(main_db_path):
989+
os.remove(main_db_path)
990+
991+
return jsonify({
992+
"status": "success",
993+
"message": "Database file reset successfully"
994+
})
995+
996+
except Exception as e:
997+
logger.error(f"Error resetting database file: {str(e)}")
998+
return jsonify({
999+
"status": "error",
1000+
"message": f"Failed to reset database file: {str(e)}"
1001+
}), 500
1002+
1003+
8241004
@app.route('/api/tables/query', methods=['POST'])
8251005
def query_table():
8261006
"""Execute a query on a table"""

py-src/data_formulator/db_manager.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,17 @@ def connection(self, session_id: str) -> ContextManager[duckdb.DuckDBPyConnectio
2727
def get_connection(self, session_id: str) -> duckdb.DuckDBPyConnection:
2828
"""Internal method to get or create a DuckDB connection for a session"""
2929
# Get or create the db file path for this session
30-
if session_id not in self._db_files:
30+
if session_id not in self._db_files or self._db_files[session_id] is None:
3131
db_file = os.path.join(tempfile.gettempdir(), f"df_{session_id}.db")
32-
print(f"Creating new db file: {db_file}")
32+
print(f"=== Creating new db file: {db_file}")
3333
self._db_files[session_id] = db_file
3434
else:
35-
print(f"Using existing db file: {self._db_files[session_id]}")
35+
print(f"=== Using existing db file: {self._db_files[session_id]}")
3636
db_file = self._db_files[session_id]
3737

3838
# Create a fresh connection to the database file
3939
conn = duckdb.connect(database=db_file)
4040
return conn
41-
4241

4342
# Initialize the DB manager
4443
db_manager = DuckDBManager()

src/app/dfSlice.tsx

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,18 @@ export const fetchAvailableModels = createAsyncThunk(
230230

231231
export const getSessionId = createAsyncThunk(
232232
"dataFormulatorSlice/getSessionId",
233-
async () => {
233+
async (_, { getState }) => {
234+
let state = getState() as DataFormulatorState;
235+
let sessionId = state.sessionId;
236+
234237
const response = await fetch(`${getUrls().GET_SESSION_ID}`, {
235-
method: 'GET',
238+
method: 'POST',
236239
headers: {
237240
'Content-Type': 'application/json',
238-
}
241+
},
242+
body: JSON.stringify({
243+
session_id: sessionId,
244+
}),
239245
});
240246
return response.json();
241247
}

src/app/utils.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ export function getUrls() {
5050

5151
AUTH_INFO_PREFIX: `/api/.auth/`,
5252

53+
UPLOAD_DB_FILE: `/api/tables/upload-db-file`,
54+
DOWNLOAD_DB_FILE: `/api/tables/download-db-file`,
55+
RESET_DB_FILE: `/api/tables/reset-db-file`,
56+
5357
GET_SESSION_ID: `/api/get-session-id`,
5458
LIST_TABLES: `/api/tables`,
5559
TABLE_DATA: `/api/tables/get-table`,

0 commit comments

Comments
 (0)