Skip to content

Commit 244e88f

Browse files
committed
pref: upload
1 parent 749b92c commit 244e88f

File tree

2 files changed

+109
-44
lines changed

2 files changed

+109
-44
lines changed

backend/apps/datasource/api/datasource.py

Lines changed: 108 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
import os
44
import traceback
55
import uuid
6+
from io import StringIO
67
from typing import List
78

89
import orjson
910
import pandas as pd
1011
from fastapi import APIRouter, File, UploadFile, HTTPException
1112

12-
from apps.db.engine import create_table, get_data_engine, insert_data
13+
from apps.db.engine import get_engine_conn
1314
from common.core.config import settings
1415
from common.core.deps import SessionDep, CurrentUser, Trans
1516
from common.utils.utils import SQLBotLogUtil
@@ -183,6 +184,70 @@ def inner():
183184
return await asyncio.to_thread(inner)
184185

185186

187+
# @router.post("/uploadExcel")
188+
# async def upload_excel(session: SessionDep, file: UploadFile = File(...)):
189+
# ALLOWED_EXTENSIONS = {"xlsx", "xls", "csv"}
190+
# if not file.filename.lower().endswith(tuple(ALLOWED_EXTENSIONS)):
191+
# raise HTTPException(400, "Only support .xlsx/.xls/.csv")
192+
#
193+
# os.makedirs(path, exist_ok=True)
194+
# filename = f"{file.filename.split('.')[0]}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}.{file.filename.split('.')[1]}"
195+
# save_path = os.path.join(path, filename)
196+
# with open(save_path, "wb") as f:
197+
# f.write(await file.read())
198+
#
199+
# def inner():
200+
# sheets = []
201+
# with get_data_engine() as conn:
202+
# if filename.endswith(".csv"):
203+
# df = pd.read_csv(save_path, engine='c')
204+
# tableName = f"sheet1_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
205+
# sheets.append({"tableName": tableName, "tableComment": ""})
206+
# column_len = len(df.dtypes)
207+
# fields = []
208+
# for i in range(column_len):
209+
# # build fields
210+
# fields.append({"name": df.columns[i], "type": str(df.dtypes[i]), "relType": ""})
211+
# # create table
212+
# create_table(conn, tableName, fields)
213+
#
214+
# data = [
215+
# {df.columns[i]: None if pd.isna(row[i]) else (int(row[i]) if "int" in str(df.dtypes[i]) else row[i])
216+
# for i in range(len(row))}
217+
# for row in df.values
218+
# ]
219+
# # insert data
220+
# insert_data(conn, tableName, fields, data)
221+
# else:
222+
# excel_engine = 'xlrd' if filename.endswith(".xls") else 'openpyxl'
223+
# df_sheets = pd.read_excel(save_path, sheet_name=None, engine=excel_engine)
224+
# # build columns and data to insert db
225+
# for sheet_name, df in df_sheets.items():
226+
# tableName = f"{sheet_name}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
227+
# sheets.append({"tableName": tableName, "tableComment": ""})
228+
# column_len = len(df.dtypes)
229+
# fields = []
230+
# for i in range(column_len):
231+
# # build fields
232+
# fields.append({"name": df.columns[i], "type": str(df.dtypes[i]), "relType": ""})
233+
# # create table
234+
# create_table(conn, tableName, fields)
235+
#
236+
# data = [
237+
# {df.columns[i]: None if pd.isna(row[i]) else (
238+
# int(row[i]) if "int" in str(df.dtypes[i]) else row[i])
239+
# for i in range(len(row))}
240+
# for row in df.values
241+
# ]
242+
# # insert data
243+
# insert_data(conn, tableName, fields, data)
244+
#
245+
# os.remove(save_path)
246+
# return {"filename": filename, "sheets": sheets}
247+
#
248+
# return await asyncio.to_thread(inner)
249+
250+
186251
@router.post("/uploadExcel")
187252
async def upload_excel(session: SessionDep, file: UploadFile = File(...)):
188253
ALLOWED_EXTENSIONS = {"xlsx", "xls", "csv"}
@@ -197,50 +262,49 @@ async def upload_excel(session: SessionDep, file: UploadFile = File(...)):
197262

198263
def inner():
199264
sheets = []
200-
with get_data_engine() as conn:
201-
if filename.endswith(".csv"):
202-
df = pd.read_csv(save_path)
203-
tableName = f"sheet1_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
265+
engine = get_engine_conn()
266+
if filename.endswith(".csv"):
267+
df = pd.read_csv(save_path, engine='c')
268+
tableName = f"sheet1_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
269+
sheets.append({"tableName": tableName, "tableComment": ""})
270+
insert_pg(df, tableName, engine)
271+
else:
272+
sheet_names = pd.ExcelFile(save_path).sheet_names
273+
for sheet_name in sheet_names:
274+
tableName = f"{sheet_name}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
204275
sheets.append({"tableName": tableName, "tableComment": ""})
205-
column_len = len(df.dtypes)
206-
fields = []
207-
for i in range(column_len):
208-
# build fields
209-
fields.append({"name": df.columns[i], "type": str(df.dtypes[i]), "relType": ""})
210-
# create table
211-
create_table(conn, tableName, fields)
212-
213-
data = [
214-
{df.columns[i]: None if pd.isna(row[i]) else (int(row[i]) if "int" in str(df.dtypes[i]) else row[i])
215-
for i in range(len(row))}
216-
for row in df.values
217-
]
218-
# insert data
219-
insert_data(conn, tableName, fields, data)
220-
else:
221-
df_sheets = pd.read_excel(save_path, sheet_name=None)
222-
# build columns and data to insert db
223-
for sheet_name, df in df_sheets.items():
224-
tableName = f"{sheet_name}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
225-
sheets.append({"tableName": tableName, "tableComment": ""})
226-
column_len = len(df.dtypes)
227-
fields = []
228-
for i in range(column_len):
229-
# build fields
230-
fields.append({"name": df.columns[i], "type": str(df.dtypes[i]), "relType": ""})
231-
# create table
232-
create_table(conn, tableName, fields)
233-
234-
data = [
235-
{df.columns[i]: None if pd.isna(row[i]) else (
236-
int(row[i]) if "int" in str(df.dtypes[i]) else row[i])
237-
for i in range(len(row))}
238-
for row in df.values
239-
]
240-
# insert data
241-
insert_data(conn, tableName, fields, data)
242-
243-
os.remove(save_path)
276+
df = pd.read_excel(save_path, sheet_name=sheet_name, engine='calamine')
277+
insert_pg(df, tableName, engine)
278+
279+
# os.remove(save_path)
244280
return {"filename": filename, "sheets": sheets}
245281

246282
return await asyncio.to_thread(inner)
283+
284+
285+
def insert_pg(df, tableName, engine):
286+
conn = engine.raw_connection()
287+
cursor = conn.cursor()
288+
try:
289+
df.to_sql(
290+
tableName,
291+
engine,
292+
if_exists='replace',
293+
index=False
294+
)
295+
# trans csv
296+
output = StringIO()
297+
df.to_csv(output, sep='\t', header=False, index=False)
298+
# output.seek(0)
299+
300+
# pg copy
301+
cursor.copy_expert(
302+
sql=f"""COPY "{tableName}" FROM STDIN WITH CSV DELIMITER E'\t'""",
303+
file=output
304+
)
305+
conn.commit()
306+
except Exception as e:
307+
pass
308+
finally:
309+
cursor.close()
310+
conn.close()

backend/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies = [
4141
"sqlparse>=0.5.3",
4242
"redis>=6.2.0",
4343
"xlsxwriter>=3.2.5",
44+
"python-calamine>=0.4.0",
4445
]
4546
[[tool.uv.index]]
4647
name = "default"

0 commit comments

Comments
 (0)